# Introduction

In 2017, the Google Research team published a paper called "Attention Is All You Need", which presented the Transformer architecture and was a paradigm shift in Machine Learning, especially in Deep Learning and the field of natural language processing.

The Transformer, with its parallel processing capabilities, allowed for more efficient and scalable models, making it easier to train them on large datasets. It also demonstrated superior performance in several NLP tasks, such as sentiment analysis and text generation tasks.

The archicture presented in this paper served as the foundation for subsequent models like GPT and BERT. Besides NLP, the Transformer architecture is used in other fields, like audio processing and computer vision. 

# Imports

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from torch.utils.tensorboard import SummaryWriter

import math

# HuggingFace libraries
from datasets import load_dataset
from tokenizers import Tokenizer
from tokenizers.models import WordLevel
from tokenizers.trainers import WordLevelTrainer
from tokenizers.pre_tokenizers import Whitespace

from pathlib import Path
from typing import Any
from tqdm import tqdm

import warnings

ImportError: DLL load failed while importing lib: The specified procedure could not be found.

# Transformer Architecture

In [None]:
class InputEmbeddings(nn.Module):
    def __init__(self, d_model: int, vocab_size: int):
        super().__init__()
        self.d_model = d_model # Dimension of vectors
        self.vocab_size = vocab_size # Size of the vocabulary
        self.embedding = nn.Embedding(vocab_size, d_model) # PyTorch layer that converts integer indices to dense embeddings

    def forward(self, x):
        return self.embedding(x) * math.sqrt(self.d_model) # Normalizing the variance of the embeddings

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int, seq_len: int, dropout: float) -> None:
        super().__init__()
        self.d_model = d_model # Dimensionality of the model
        self.seq_len = seq_len # Maximum sequence length
        self.dropout = nn.Dropout(dropout) # Dropout layer to prevent overfitting

        # Creating a positional encoding matrix of shape (seq_len, d_model) filled with zeros
        pe = torch.zeros(seq_len, d_model)

        # Creating a tensor representing positions (0 to seq_len - 1)
        position = torch.arange(0, seq_len, dtype = torch.float).unsqueeze(1) # Transforming 'position' into a 2D tensor['seq_len, 1']

        # Creating the division term for the positional encoding formula
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))

        # Apply sine to even indices in pe
        pe[:, 0::2] = torch.sin(position * div_term)
        # Apply cosine to odd indices in pe
        pe[:, 1::2] = torch.cos(position * div_term)

        # Adding an extra dimension at the beginning of pe matrix for batch handling
        pe = pe.unsqueeze(0)

        # Registering 'pe' as buffer. Buffer is a tensor not considered as a model parameter
        self.register_buffer('pe', pe)

    def forward(self,x):
        # Addind positional encoding to the input tensor X
        x = x + (self.pe[:, :x.shape[1], :]).requires_grad_(False)
        return self.dropout(x) # Dropout for regularization

In [None]:
class LayerNormalization(nn.Module):
    def __init__(self, eps: float=10**-6) -> None: # We define epsilon as 0.000001 to avoid division by zero
        super().__init__()
        self.eps = eps

        # We define alpha as a trainable parameter and initialize it with ones
        self.alpha = nn.Parameter(torch.ones(1)) # One-dimensional tensor that will be used to scale the input data

        # We define bias as a trainable parameter and initialize it with zeros
        self.bias = nn.Parameter(torch.zeros(1)) # One-dimensional tenso that will be added to the input data

    def forward(self, x):
        mean = x.mean(dim = -1, keepdim = True) # Computing the mean of the input data. Keeping the number of dimensions unchanged
        std = x.std(dim = -1, keepdim = True) # Computing the standard deviation of the input data. Keeping the number of dimensions unchanged

        # Returning the normalized input
        return self.alpha * (x-mean) / (std + self.eps) + self.bias

In [None]:
class FeedForwardBlock(nn.Module):
    def __init__(self, d_model: int, d_ff: int, dropout: float) -> None:
        super().__init__()
        # First linear transformation
        self.linear_1 = nn.Linear(d_model, d_ff) # W1 & b1
        self.dropout = nn.Dropout(dropout) # Dropout to prevent overfitting
        # Second linear transformation
        self.linear_2 = nn.Linear(d_ff, d_model) # W2 & b2

    def forward(self, x):
        # (Batch, seq_len, d_model) --> (batch, seq_len, d_ff) -->(batch, seq_len, d_model)
        return self.linear_2(self.dropout(torch.relu(self.linear_1(x))))

In [None]:
class MultiHeadAttentionBlock(nn.Module):
    def __init__(self, d_model: int, h: int, dropout: float) -> None: # h = number of heads
        super().__init__()
        self.d_model = d_model
        self.h = h

        # We ensure that the dimensions of the model is divisible by the number of heads
        assert d_model % h == 0, 'd_model is not divisible by h'

        # d_k is the dimension of each attention head's key, query, and value vectors
        self.d_k = d_model // h # d_k formula, like in the original "Attention Is All You Need" paper

        # Defining the weight matrices
        self.w_q = nn.Linear(d_model, d_model) # W_q
        self.w_k = nn.Linear(d_model, d_model) # W_k
        self.w_v = nn.Linear(d_model, d_model) # W_v
        self.w_o = nn.Linear(d_model, d_model) # W_o

        self.dropout = nn.Dropout(dropout) # Dropout layer to avoid overfitting


    @staticmethod
    def attention(query, key, value, mask, dropout: nn.Dropout):# mask => When we want certain words to NOT interact with others, we "hide" them

        d_k = query.shape[-1] # The last dimension of query, key, and value

        # We calculate the Attention(Q,K,V) as in the formula in the image above
        attention_scores = (query @ key.transpose(-2,-1)) / math.sqrt(d_k) # @ = Matrix multiplication sign in PyTorch

        # Before applying the softmax, we apply the mask to hide some interactions between words
        if mask is not None: # If a mask IS defined...
            attention_scores.masked_fill_(mask == 0, -1e9) # Replace each value where mask is equal to 0 by -1e9
        attention_scores = attention_scores.softmax(dim = -1) # Applying softmax
        if dropout is not None: # If a dropout IS defined...
            attention_scores = dropout(attention_scores) # We apply dropout to prevent overfitting

        return (attention_scores @ value), attention_scores # Multiply the output matrix by the V matrix, as in the formula

    def forward(self, q, k, v, mask):

        query = self.w_q(q) # Q' matrix
        key = self.w_k(k) # K' matrix
        value = self.w_v(v) # V' matrix


        # Splitting results into smaller matrices for the different heads
        # Splitting embeddings (third dimension) into h parts
        query = query.view(query.shape[0], query.shape[1], self.h, self.d_k).transpose(1,2) # Transpose => bring the head to the second dimension
        key = key.view(key.shape[0], key.shape[1], self.h, self.d_k).transpose(1,2) # Transpose => bring the head to the second dimension
        value = value.view(value.shape[0], value.shape[1], self.h, self.d_k).transpose(1,2) # Transpose => bring the head to the second dimension

        # Obtaining the output and the attention scores
        x, self.attention_scores = MultiHeadAttentionBlock.attention(query, key, value, mask, self.dropout)

        # Obtaining the H matrix
        x = x.transpose(1, 2).contiguous().view(x.shape[0], -1, self.h * self.d_k)

        return self.w_o(x) # Multiply the H matrix by the weight matrix W_o, resulting in the MH-A matrix

In [None]:
class ResidualConnection(nn.Module):
    def __init__(self, dropout: float) -> None:
        super().__init__()
        self.dropout = nn.Dropout(dropout) # We use a dropout layer to prevent overfitting
        self.norm = LayerNormalization() # We use a normalization layer

    def forward(self, x, sublayer):
        # We normalize the input and add it to the original input 'x'. This creates the residual connection process.
        return x + self.dropout(sublayer(self.norm(x)))

In [None]:
class EncoderBlock(nn.Module):
    # This block takes in the MultiHeadAttentionBlock and FeedForwardBlock, as well as the dropout rate for the residual connections
    def __init__(self, self_attention_block: MultiHeadAttentionBlock, feed_forward_block: FeedForwardBlock, dropout: float) -> None:
        super().__init__()
        # Storing the self-attention block and feed-forward block
        self.self_attention_block = self_attention_block
        self.feed_forward_block = feed_forward_block
        self.residual_connections = nn.ModuleList([ResidualConnection(dropout) for _ in range(2)]) # 2 Residual Connections with dropout

    def forward(self, x, src_mask):
        # Applying the first residual connection with the self-attention block
        x = self.residual_connections[0](x, lambda x: self.self_attention_block(x, x, x, src_mask)) # Three 'x's corresponding to query, key, and value inputs plus source mask

        # Applying the second residual connection with the feed-forward block
        x = self.residual_connections[1](x, self.feed_forward_block)
        return x # Output tensor after applying self-attention and feed-forward layers with residual connections.

In [None]:
class Encoder(nn.Module):
    # The Encoder takes in instances of 'EncoderBlock'
    def __init__(self, layers: nn.ModuleList) -> None:
        super().__init__()
        self.layers = layers # Storing the EncoderBlocks
        self.norm = LayerNormalization() # Layer for the normalization of the output of the encoder layers

    def forward(self, x, mask):
        # Iterating over each EncoderBlock stored in self.layers
        for layer in self.layers:
            x = layer(x, mask) # Applying each EncoderBlock to the input tensor 'x'
        return self.norm(x) # Normalizing output

In [None]:
class DecoderBlock(nn.Module):
    # The DecoderBlock takes in two MultiHeadAttentionBlock. One is self-attention, while the other is cross-attention.
    # It also takes in the feed-forward block and the dropout rate
    def __init__(self,  self_attention_block: MultiHeadAttentionBlock, cross_attention_block: MultiHeadAttentionBlock, feed_forward_block: FeedForwardBlock, dropout: float) -> None:
        super().__init__()
        self.self_attention_block = self_attention_block
        self.cross_attention_block = cross_attention_block
        self.feed_forward_block = feed_forward_block
        self.residual_connections = nn.ModuleList([ResidualConnection(dropout) for _ in range(3)]) # List of three Residual Connections with dropout rate

    def forward(self, x, encoder_output, src_mask, tgt_mask):

        # Self-Attention block with query, key, and value plus the target language mask
        x = self.residual_connections[0](x, lambda x: self.self_attention_block(x, x, x, tgt_mask))

        # The Cross-Attention block using two 'encoder_ouput's for key and value plus the source language mask. It also takes in 'x' for Decoder queries
        x = self.residual_connections[1](x, lambda x: self.cross_attention_block(x, encoder_output, encoder_output, src_mask))

        # Feed-forward block with residual connections
        x = self.residual_connections[2](x, self.feed_forward_block)
        return x

In [None]:
class Decoder(nn.Module):
    # The Decoder takes in instances of 'DecoderBlock'
    def __init__(self, layers: nn.ModuleList) -> None:
        super().__init__()

        # Storing the 'DecoderBlock's
        self.layers = layers
        self.norm = LayerNormalization() # Layer to normalize the output

    def forward(self, x, encoder_output, src_mask, tgt_mask):

        # Iterating over each DecoderBlock stored in self.layers
        for layer in self.layers:
            # Applies each DecoderBlock to the input 'x' plus the encoder output and source and target masks
            x = layer(x, encoder_output, src_mask, tgt_mask)
        return self.norm(x) # Returns normalized output

In [None]:
class ProjectionLayer(nn.Module):
    def __init__(self, d_model: int, vocab_size: int) -> None: # Model dimension and the size of the output vocabulary
        super().__init__()
        self.proj = nn.Linear(d_model, vocab_size) # Linear layer for projecting the feature space of 'd_model' to the output space of 'vocab_size'
    def forward(self, x):
        return torch.log_softmax(self.proj(x), dim = -1) # Applying the log Softmax function to the output

In [None]:
class Transformer(nn.Module):
    # This takes in the encoder and decoder, as well the embeddings for the source and target language.
    # It also takes in the Positional Encoding for the source and target language, as well as the projection layer
    def __init__(self, encoder: Encoder, decoder: Decoder, src_embed: InputEmbeddings, tgt_embed: InputEmbeddings, src_pos: PositionalEncoding, tgt_pos: PositionalEncoding, projection_layer: ProjectionLayer) -> None:
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.src_embed = src_embed
        self.tgt_embed = tgt_embed
        self.src_pos = src_pos
        self.tgt_pos = tgt_pos
        self.projection_layer = projection_layer

    # Encoder
    def encode(self, src, src_mask):
        src = self.src_embed(src) # Applying source embeddings to the input source language
        src = self.src_pos(src) # Applying source positional encoding to the source embeddings
        return self.encoder(src, src_mask) # Returning the source embeddings plus a source mask to prevent attention to certain elements

    # Decoder
    def decode(self, encoder_output, src_mask, tgt, tgt_mask):
        tgt = self.tgt_embed(tgt) # Applying target embeddings to the input target language (tgt)
        tgt = self.tgt_pos(tgt) # Applying target positional encoding to the target embeddings

        # Returning the target embeddings, the output of the encoder, and both source and target masks
        # The target mask ensures that the model won't 'see' future elements of the sequence
        return self.decoder(tgt, encoder_output, src_mask, tgt_mask)

    # Applying Projection Layer with the Softmax function to the Decoder output
    def project(self, x):
        return self.projection_layer(x)

In [None]:
# Building & Initializing Transformer
# Definin function and its parameter, including model dimension, number of encoder and decoder stacks, heads, etc.
def build_transformer(src_vocab_size: int, tgt_vocab_size: int, src_seq_len: int, tgt_seq_len: int, d_model: int = 512, N: int = 6, h: int = 8, dropout: float = 0.1, d_ff: int = 2048) -> Transformer:
    # Creating Embedding layers
    src_embed = InputEmbeddings(d_model, src_vocab_size) # Source language (Source Vocabulary to 512-dimensional vectors)
    tgt_embed = InputEmbeddings(d_model, tgt_vocab_size) # Target language (Target Vocabulary to 512-dimensional vectors)

    # Creating Positional Encoding layers
    src_pos = PositionalEncoding(d_model, src_seq_len, dropout) # Positional encoding for the source language embeddings
    tgt_pos = PositionalEncoding(d_model, tgt_seq_len, dropout) # Positional encoding for the target language embeddings

    # Creating EncoderBlocks
    encoder_blocks = [] # Initial list of empty EncoderBlocks
    for _ in range(N): # Iterating 'N' times to create 'N' EncoderBlocks (N = 6)
        encoder_self_attention_block = MultiHeadAttentionBlock(d_model, h, dropout) # Self-Attention
        feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout) # FeedForward

        # Combine layers into an EncoderBlock
        encoder_block = EncoderBlock(encoder_self_attention_block, feed_forward_block, dropout)
        encoder_blocks.append(encoder_block) # Appending EncoderBlock to the list of EncoderBlocks

    # Creating DecoderBlocks
    decoder_blocks = [] # Initial list of empty DecoderBlocks
    for _ in range(N): # Iterating 'N' times to create 'N' DecoderBlocks (N = 6)
        decoder_self_attention_block = MultiHeadAttentionBlock(d_model, h, dropout) # Self-Attention
        decoder_cross_attention_block = MultiHeadAttentionBlock(d_model, h, dropout) # Cross-Attention
        feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout) # FeedForward

        # Combining layers into a DecoderBlock
        decoder_block = DecoderBlock(decoder_self_attention_block, decoder_cross_attention_block, feed_forward_block, dropout)
        decoder_blocks.append(decoder_block) # Appending DecoderBlock to the list of DecoderBlocks

    # Creating the Encoder and Decoder by using the EncoderBlocks and DecoderBlocks lists
    encoder = Encoder(nn.ModuleList(encoder_blocks))
    decoder = Decoder(nn.ModuleList(decoder_blocks))

    # Creating projection layer
    projection_layer = ProjectionLayer(d_model, tgt_vocab_size) # Map the output of Decoder to the Target Vocabulary Space

    # Creating the transformer by combining everything above
    transformer = Transformer(encoder, decoder, src_embed, tgt_embed, src_pos, tgt_pos, projection_layer)

    # Initialize the parameters
    for p in transformer.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)

    return transformer # Assembled and initialized Transformer. Ready to be trained and validated!

# Tokenizer

In [None]:
def build_tokenizer(config, ds, lang):
    # Crating a file path for the tokenizer
    tokenizer_path = Path(config['tokenizer_file'].format(lang))

    # Checking if Tokenizer already exists
    if not Path.exists(tokenizer_path):

        # If it doesn't exist, we create a new one
        tokenizer = Tokenizer(WordLevel(unk_token = '[UNK]')) # Initializing a new world-level tokenizer
        tokenizer.pre_tokenizer = Whitespace() # We will split the text into tokens based on whitespace

        # Creating a trainer for the new tokenizer
        trainer = WordLevelTrainer(special_tokens = ["[UNK]", "[PAD]",
                                                     "[SOS]", "[EOS]"], min_frequency = 2) # Defining Word Level strategy and special tokens

        # Training new tokenizer on sentences from the dataset and language specified
        tokenizer.train_from_iterator(get_all_sentences(ds, lang), trainer = trainer)
        tokenizer.save(str(tokenizer_path)) # Saving trained tokenizer to the file path specified at the beginning of the function
    else:
        tokenizer = Tokenizer.from_file(str(tokenizer_path)) # If the tokenizer already exist, we load it
    return tokenizer # Returns the loaded tokenizer or the trained tokenizer

# Dataset

In [None]:
def get_ds(config):
    # Loading the train portion of the OpusBooks dataset.
    # The Language pairs will be defined in the 'config' dictionary we will build later
    ds_raw = load_dataset('opus_books', f'{config["lang_src"]}-{config["lang_tgt"]}', split = 'train')

    # Building or loading tokenizer for both the source and target languages
    tokenizer_src = build_tokenizer(config, ds_raw, config['lang_src'])
    tokenizer_tgt = build_tokenizer(config, ds_raw, config['lang_tgt'])

    # Splitting the dataset for training and validation
    train_ds_size = int(0.9 * len(ds_raw)) # 90% for training
    val_ds_size = len(ds_raw) - train_ds_size # 10% for validation
    train_ds_raw, val_ds_raw = random_split(ds_raw, [train_ds_size, val_ds_size]) # Randomly splitting the dataset

    # Processing data with the BilingualDataset class, which we will define below
    train_ds = BilingualDataset(train_ds_raw, tokenizer_src, tokenizer_tgt, config['lang_src'], config['lang_tgt'], config['seq_len'])
    val_ds = BilingualDataset(val_ds_raw, tokenizer_src, tokenizer_tgt, config['lang_src'], config['lang_tgt'], config['seq_len'])

    # Iterating over the entire dataset and printing the maximum length found in the sentences of both the source and target languages
    max_len_src = 0
    max_len_tgt = 0
    for pair in ds_raw:
        src_ids = tokenizer_src.encode(pair['translation'][config['lang_src']]).ids
        tgt_ids = tokenizer_src.encode(pair['translation'][config['lang_tgt']]).ids
        max_len_src = max(max_len_src, len(src_ids))
        max_len_tgt = max(max_len_tgt, len(tgt_ids))

    print(f'Max length of source sentence: {max_len_src}')
    print(f'Max length of target sentence: {max_len_tgt}')

    # Creating dataloaders for the training and validadion sets
    # Dataloaders are used to iterate over the dataset in batches during training and validation
    train_dataloader = DataLoader(train_ds, batch_size = config['batch_size'], shuffle = True) # Batch size will be defined in the config dictionary
    val_dataloader = DataLoader(val_ds, batch_size = 1, shuffle = True)

    return train_dataloader, val_dataloader, tokenizer_src, tokenizer_tgt # Returning the DataLoader objects and tokenizers

In [None]:
def casual_mask(size):
        # Creating a square matrix of dimensions 'size x size' filled with ones
        mask = torch.triu(torch.ones(1, size, size), diagonal = 1).type(torch.int)
        return mask == 0

In [None]:
class BilingualDataset(Dataset):
    # This takes in the dataset contaning sentence pairs, the tokenizers for target and source languages, and the strings of source and target languages
    # 'seq_len' defines the sequence length for both languages
    def __init__(self, ds, tokenizer_src, tokenizer_tgt, src_lang, tgt_lang, seq_len) -> None:
        super().__init__()

        self.seq_len = seq_len
        self.ds = ds
        self.tokenizer_src = tokenizer_src
        self.tokenizer_tgt = tokenizer_tgt
        self.src_lang = src_lang
        self.tgt_lang = tgt_lang

        # Defining special tokens by using the target language tokenizer
        self.sos_token = torch.tensor([tokenizer_tgt.token_to_id("[SOS]")], dtype=torch.int64)
        self.eos_token = torch.tensor([tokenizer_tgt.token_to_id("[EOS]")], dtype=torch.int64)
        self.pad_token = torch.tensor([tokenizer_tgt.token_to_id("[PAD]")], dtype=torch.int64)

        new_ds = []
        for pair in ds:
            src_text = pair['translation'][src_lang]
            tgt_text = pair['translation'][tgt_lang]
            
            # Tokenizar las oraciones fuente y objetivo
            enc_input_tokens = tokenizer_src.encode(src_text).ids
            dec_input_tokens = tokenizer_tgt.encode(tgt_text).ids

            # Verificar si la longitud de los tokens es válida
            if len(enc_input_tokens) + 2 <= seq_len and len(dec_input_tokens) + 1 <= seq_len:
                new_ds.append(pair)
        self.ds = new_ds


    # Total number of instances in the dataset (some pairs are larger than others)
    def __len__(self):
        return len(self.ds)

    # Using the index to retrive source and target texts
    def __getitem__(self, index: Any) -> Any:
        src_target_pair = self.ds[index]
        src_text = src_target_pair['translation'][self.src_lang]
        tgt_text = src_target_pair['translation'][self.tgt_lang]

        # Tokenizing source and target texts
        enc_input_tokens = self.tokenizer_src.encode(src_text).ids
        dec_input_tokens = self.tokenizer_tgt.encode(tgt_text).ids

        # Computing how many padding tokens need to be added to the tokenized texts
        # Source tokens
        enc_num_padding_tokens = self.seq_len - len(enc_input_tokens) - 2 # Subtracting the two '[EOS]' and '[SOS]' special tokens
        # Target tokens
        dec_num_padding_tokens = self.seq_len - len(dec_input_tokens) - 1 # Subtracting the '[SOS]' special token

        # If the texts exceed the 'seq_len' allowed, it will raise an error. This means that one of the sentences in the pair is too long to be processed
        # given the current sequence length limit (this will be defined in the config dictionary below)
        if enc_num_padding_tokens < 0 or dec_num_padding_tokens < 0:
            raise ValueError('Sentence is too long')

        # Building the encoder input tensor by combining several elements
        encoder_input = torch.cat(
            [
            self.sos_token, # inserting the '[SOS]' token
            torch.tensor(enc_input_tokens, dtype = torch.int64), # Inserting the tokenized source text
            self.eos_token, # Inserting the '[EOS]' token
            torch.tensor([self.pad_token] * enc_num_padding_tokens, dtype = torch.int64) # Addind padding tokens
            ]
        )

        # Building the decoder input tensor by combining several elements
        decoder_input = torch.cat(
            [
                self.sos_token, # inserting the '[SOS]' token
                torch.tensor(dec_input_tokens, dtype = torch.int64), # Inserting the tokenized target text
                torch.tensor([self.pad_token] * dec_num_padding_tokens, dtype = torch.int64) # Addind padding tokens
            ]

        )

        # Creating a label tensor, the expected output for training the model
        label = torch.cat(
            [
                torch.tensor(dec_input_tokens, dtype = torch.int64), # Inserting the tokenized target text
                self.eos_token, # Inserting the '[EOS]' token
                torch.tensor([self.pad_token] * dec_num_padding_tokens, dtype = torch.int64) # Adding padding tokens

            ]
        )

        # Ensuring that the length of each tensor above is equal to the defined 'seq_len'
        assert encoder_input.size(0) == self.seq_len
        assert decoder_input.size(0) == self.seq_len
        assert label.size(0) == self.seq_len

        return {
            'encoder_input': encoder_input,
            'decoder_input': decoder_input,
            'encoder_mask': (encoder_input != self.pad_token).unsqueeze(0).unsqueeze(0).int(),
            'decoder_mask': (decoder_input != self.pad_token).unsqueeze(0).unsqueeze(0).int() & casual_mask(decoder_input.size(0)),
            'label': label,
            'src_text': src_text,
            'tgt_text': tgt_text
        }

# Predict

In [2]:
# Define function to obtain the most probable next token
def greedy_decode(model, source, source_mask, tokenizer_src, tokenizer_tgt, max_len, device):
    # Retrieving the indices from the start and end of sequences of the target tokens
    sos_idx = tokenizer_tgt.token_to_id('[SOS]')
    eos_idx = tokenizer_tgt.token_to_id('[EOS]')

    # Computing the output of the encoder for the source sequence
    encoder_output = model.encode(source, source_mask)
    # Initializing the decoder input with the Start of Sentence token
    decoder_input = torch.empty(1,1).fill_(sos_idx).type_as(source).to(device)

    # Looping until the 'max_len', maximum length, is reached
    while True:
        if decoder_input.size(1) == max_len:
            break

        # Building a mask for the decoder input
        decoder_mask = casual_mask(decoder_input.size(1)).type_as(source_mask).to(device)

        # Calculating the output of the decoder
        out = model.decode(encoder_output, source_mask, decoder_input, decoder_mask)

        # Applying the projection layer to get the probabilities for the next token
        prob = model.project(out[:, -1])

        # Selecting token with the highest probability
        _, next_word = torch.max(prob, dim=1)
        decoder_input = torch.cat([decoder_input, torch.empty(1,1). type_as(source).fill_(next_word.item()).to(device)], dim=1)

        # If the next token is an End of Sentence token, we finish the loop
        if next_word == eos_idx:
            break

    return decoder_input.squeeze(0) # Sequence of tokens generated by the decoder

In [3]:
# Defining function to evaluate the model on the validation dataset
# num_examples = 2, two examples per run
def run_validation(model, validation_ds, tokenizer_src, tokenizer_tgt, max_len, device, print_msg, global_state, writer, num_examples=2):
    model.eval() # Setting model to evaluation mode
    count = 0 # Initializing counter to keep track of how many examples have been processed

    console_width = 80 # Fixed witdh for printed messages

    # Creating evaluation loop
    with torch.no_grad(): # Ensuring that no gradients are computed during this process
        for batch in validation_ds:
            count += 1
            encoder_input = batch['encoder_input'].to(device)
            encoder_mask = batch['encoder_mask'].to(device)

            # Ensuring that the batch_size of the validation set is 1
            assert encoder_input.size(0) ==  1, 'Batch size must be 1 for validation.'

            # Applying the 'greedy_decode' function to get the model's output for the source text of the input batch
            model_out = greedy_decode(model, encoder_input, encoder_mask, tokenizer_src, tokenizer_tgt, max_len, device)

            # Retrieving source and target texts from the batch
            source_text = batch['src_text'][0]
            target_text = batch['tgt_text'][0] # True translation
            model_out_text = tokenizer_tgt.decode(model_out.detach().cpu().numpy()) # Decoded, human-readable model output

            # Printing results
            print_msg('-'*console_width)
            print_msg(f'SOURCE: {source_text}')
            print_msg(f'TARGET: {target_text}')
            print_msg(f'PREDICTED: {model_out_text}')

            # After two examples, we break the loop
            if count == num_examples:
                break

# Train

In [4]:
# We pass as parameters the config dictionary, the length of the vocabylary of the source language and the target language
def get_model(config, vocab_src_len, vocab_tgt_len):

    # Loading model using the 'build_transformer' function.
    # We will use the lengths of the source language and target language vocabularies, the 'seq_len', and the dimensionality of the embeddings
    model = build_transformer(vocab_src_len, vocab_tgt_len, config['seq_len'], config['seq_len'], config['d_model'])
    return model

In [5]:
# Define settings for building and training the transformer model
def get_config():
    return{
        'batch_size': 256,
        'num_epochs': 100,
        'lr': 10**-4,
        'seq_len': 16,
        'd_model': 512, # Dimensions of the embeddings in the Transformer. 512 like in the "Attention Is All You Need" paper.
        'lang_src': 'en',
        'lang_tgt': 'es',
        'model_folder': 'weights',
        'model_basename': 'tmodel_',
        'preload': None,
        'tokenizer_file': 'tokenizer_{0}.json',
        'experiment_name': 'runs/tmodel'
    }


# Function to construct the path for saving and retrieving model weights
def get_weights_file_path(config, epoch: str):
    model_folder = config['model_folder'] # Extracting model folder from the config
    model_basename = config['model_basename'] # Extracting the base name for model files
    model_filename = f"{model_basename}{epoch}.pt" # Building filename
    return str(Path('.')/ model_folder/ model_filename) # Combining current directory, the model folder, and the model filename

In [6]:
def train_model(config):
    # Setting up device to run on GPU to train faster
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device {device}")

    # Creating model directory to store weights
    Path(config['model_folder']).mkdir(parents=True, exist_ok=True)

    # Retrieving dataloaders and tokenizers for source and target languages using the 'get_ds' function
    train_dataloader, val_dataloader, tokenizer_src, tokenizer_tgt = get_ds(config)

    # Initializing model on the GPU using the 'get_model' function
    model = get_model(config,tokenizer_src.get_vocab_size(), tokenizer_tgt.get_vocab_size()).to(device)

    # Tensorboard
    writer = SummaryWriter(config['experiment_name'])

    # Setting up the Adam optimizer with the specified learning rate from the '
    # config' dictionary plus an epsilon value
    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'], eps = 1e-9)

    # Initializing epoch and global step variables
    initial_epoch = 0
    global_step = 0

    # Checking if there is a pre-trained model to load
    # If true, loads it
    if config['preload']:
        model_filename = get_weights_file_path(config, config['preload'])
        print(f'Preloading model {model_filename}')
        state = torch.load(model_filename) # Loading model

        # Sets epoch to the saved in the state plus one, to resume from where it stopped
        initial_epoch = state['epoch'] + 1
        # Loading the optimizer state from the saved model
        optimizer.load_state_dict(state['optimizer_state_dict'])
        # Loading the global step state from the saved model
        global_step = state['global_step']

    # Initializing CrossEntropyLoss function for training
    # We ignore padding tokens when computing loss, as they are not relevant for the learning process
    # We also apply label_smoothing to prevent overfitting
    loss_fn = nn.CrossEntropyLoss(ignore_index = tokenizer_src.token_to_id('[PAD]'), label_smoothing = 0.1).to(device)

    # Initializing training loop

    # Iterating over each epoch from the 'initial_epoch' variable up to
    # the number of epochs informed in the config
    for epoch in range(initial_epoch, config['num_epochs']):

        # Initializing an iterator over the training dataloader
        # We also use tqdm to display a progress bar
        batch_iterator = tqdm(train_dataloader, desc = f'Processing epoch {epoch:02d}')

        # For each batch...
        for batch in batch_iterator:
            model.train() # Train the model

            # Loading input data and masks onto the GPU
            encoder_input = batch['encoder_input'].to(device)
            decoder_input = batch['decoder_input'].to(device)
            encoder_mask = batch['encoder_mask'].to(device)
            decoder_mask = batch['decoder_mask'].to(device)

            # Running tensors through the Transformer
            encoder_output = model.encode(encoder_input, encoder_mask)
            decoder_output = model.decode(encoder_output, encoder_mask, decoder_input, decoder_mask)
            proj_output = model.project(decoder_output)

            # Loading the target labels onto the GPU
            label = batch['label'].to(device)

            # Computing loss between model's output and true labels
            loss = loss_fn(proj_output.view(-1, tokenizer_tgt.get_vocab_size()), label.view(-1))

            # Updating progress bar
            batch_iterator.set_postfix({f"loss": f"{loss.item():6.3f}"})

            writer.add_scalar('train loss', loss.item(), global_step)
            writer.flush()

            # Performing backpropagation
            loss.backward()

            # Updating parameters based on the gradients
            optimizer.step()

            # Clearing the gradients to prepare for the next batch
            optimizer.zero_grad()

            global_step += 1 # Updating global step count

        # We run the 'run_validation' function at the end of each epoch
        # to evaluate model performance
        run_validation(model, val_dataloader, tokenizer_src, tokenizer_tgt, config['seq_len'], device, lambda msg: batch_iterator.write(msg), global_step, writer)

        # Saving model
        #model_filename = get_weights_file_path(config, f'{epoch:02d}')
        # Writting current model state to the 'model_filename'
    """     torch.save({
            'epoch': epoch, # Current epoch
            'model_state_dict': model.state_dict(),# Current model state
            'optimizer_state_dict': optimizer.state_dict(), # Current optimizer state
            'global_step': global_step # Current global step
        }, model_filename) """

In [None]:
warnings.filterwarnings('ignore') # Filtering warnings
config = get_config() # Retrieving config settings
train_model(config) # Training model with the config arguments

Using device cuda
Max length of source sentence: 767
Max length of target sentence: 782


Processing epoch 00: 100%|██████████| 103/103 [00:27<00:00,  3.77it/s, loss=6.533]


--------------------------------------------------------------------------------
SOURCE: All day, just like the peasants?'
TARGET: ¿Igual que ellos? ¿Todo el día?
PREDICTED: ¿ , , , , , , , , .
--------------------------------------------------------------------------------
SOURCE: "Never mind him.
TARGET: ––No se preocupe por él.
PREDICTED: ¿ , , , , , .


Processing epoch 01: 100%|██████████| 103/103 [00:27<00:00,  3.71it/s, loss=5.981]


--------------------------------------------------------------------------------
SOURCE: I will answer for Ayrton's fidelity."
TARGET: Yo respondo de la fidelidad de Ayrton.
PREDICTED: ¡ No , !
--------------------------------------------------------------------------------
SOURCE: No! a duel is unthinkable and no one expects it of me.
TARGET: El duelo es inadmisible y nadie espere que yo lo provoque.
PREDICTED: ¡ No , la la la la la la la la la la la .


Processing epoch 02: 100%|██████████| 103/103 [00:27<00:00,  3.74it/s, loss=5.695]


--------------------------------------------------------------------------------
SOURCE: 'Well, I'll come with you. May I?'
TARGET: –¿Puedo acompañarte?
PREDICTED: ¿ Qué qué ? – preguntó el .
--------------------------------------------------------------------------------
SOURCE: I held the chronometer.
TARGET: Yo tenía el cronómetro.
PREDICTED: ¡ No , no !


Processing epoch 03: 100%|██████████| 103/103 [00:27<00:00,  3.68it/s, loss=5.436]


--------------------------------------------------------------------------------
SOURCE: "What!
TARGET: »-¿Cómo?
PREDICTED: ¡ Qué !
--------------------------------------------------------------------------------
SOURCE: "No."
TARGET: No respondió Gualterio Ralph.
PREDICTED: - No .


Processing epoch 04: 100%|██████████| 103/103 [00:28<00:00,  3.59it/s, loss=5.097]


--------------------------------------------------------------------------------
SOURCE: Everybody knows her and Aline Stahl.'
TARGET: A ella y a Alina Stal todos los conocen.
PREDICTED: El y se .
--------------------------------------------------------------------------------
SOURCE: For what purpose was this meeting?
TARGET: ¿Y por qué aquel mitin?
PREDICTED: ¿ Qué ha sido a la hombre ?


Processing epoch 05: 100%|██████████| 103/103 [00:28<00:00,  3.57it/s, loss=4.746]


--------------------------------------------------------------------------------
SOURCE: It pains me to be misjudged by so good a woman."
TARGET: Me disgusta que una mujer tan bondadosa como ella me juzgue mal.
PREDICTED: ¡ a a que me !
--------------------------------------------------------------------------------
SOURCE: "Bogs make queer noises sometimes.
TARGET: -Las ciénagas hacen a veces ruidos extraños.
PREDICTED: - La hombre de la hombre de la .


Processing epoch 06: 100%|██████████| 103/103 [00:28<00:00,  3.66it/s, loss=4.743]


--------------------------------------------------------------------------------
SOURCE: "And with my own!" the harpooner replied simply.
TARGET: -Y la mía -respondió el arponero, con la mayor simplicidad.
PREDICTED: - Y , a mi capitán Nemo - dijo el capitán Nemo .
--------------------------------------------------------------------------------
SOURCE: But the ship didn't stay long in these heavily traveled waterways.
TARGET: Pero no permaneció por mucho tiempo en esos parajes tan frecuentados.
PREDICTED: Pero no había sido una vez más que no había sido de .


Processing epoch 07: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=4.578]


--------------------------------------------------------------------------------
SOURCE: But Katavasov liked the third, an artilleryman, very much.
TARGET: En cambio el artillero despertó la simpatía de Katavasov.
PREDICTED: Pero el capitán Nemo se había sido un momento , pero se había sido .
--------------------------------------------------------------------------------
SOURCE: There is not enough for three."
TARGET: Es poco para tres...
PREDICTED: No me .


Processing epoch 08: 100%|██████████| 103/103 [00:28<00:00,  3.59it/s, loss=4.594]


--------------------------------------------------------------------------------
SOURCE: "Certainly, Conseil.
TARGET: -Claro que sí, Conseil.
PREDICTED: - Sí , señor Holmes .
--------------------------------------------------------------------------------
SOURCE: Why are you so glum?'
TARGET: Pero ¿qué te pasa? ¿Estás triste?
PREDICTED: ¿ Por qué ?


Processing epoch 09: 100%|██████████| 103/103 [00:30<00:00,  3.40it/s, loss=4.378]


--------------------------------------------------------------------------------
SOURCE: "Offended me!
TARGET: -¡Ofenderme!
PREDICTED: ¡ !
--------------------------------------------------------------------------------
SOURCE: All within Elinor's breast was satisfaction, silent and strong.
TARGET: Todo lo que abrigaba el pecho de Elinor era satisfacción, callada y fuerte.
PREDICTED: El capitán Nemo se había sido en la cabeza y se levantó .


Processing epoch 10: 100%|██████████| 103/103 [00:30<00:00,  3.36it/s, loss=4.203]


--------------------------------------------------------------------------------
SOURCE: "Here's a remarkable book!
TARGET: —¡Qué libro!
PREDICTED: Es un hombre que es un hombre muy muy muy muy muy muy .
--------------------------------------------------------------------------------
SOURCE: What will become of me?'
TARGET: ¿Qué va a ser de mí?
PREDICTED: ¿ Qué me ha sido ?


Processing epoch 11: 100%|██████████| 103/103 [00:28<00:00,  3.55it/s, loss=4.110]


--------------------------------------------------------------------------------
SOURCE: "A floating lighthouse," said someone next to me.
TARGET: -Un faro flotante -dijo alguien cerca de mí.
PREDICTED: El hombre se ha sido en seguida .
--------------------------------------------------------------------------------
SOURCE: He soon extricated himself from their grasp.
TARGET: El vigoroso Ayrton se desembarazó de ellos.
PREDICTED: Me parece que se en su casa .


Processing epoch 12: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=3.949]


--------------------------------------------------------------------------------
SOURCE: But I could not remain alone for long.
TARGET: Sin embargo, no podía permanecer más de este tiempo solo.
PREDICTED: Pero no podía ser más más más más más que mucho tiempo .
--------------------------------------------------------------------------------
SOURCE: But what do _you_ think?"
TARGET: Digo si le gustan a usted.
PREDICTED: Pero , ¿ qué te ?


Processing epoch 13: 100%|██████████| 103/103 [00:27<00:00,  3.70it/s, loss=3.806]


--------------------------------------------------------------------------------
SOURCE: What a struggle!
TARGET: ¡Qué lucha!
PREDICTED: ¡ Qué espectáculo !
--------------------------------------------------------------------------------
SOURCE: 'But how will schools help?'
TARGET: –¿De qué pueden servir las escuelas?
PREDICTED: – Y ahora se trata de la isla ...


Processing epoch 14: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=3.763]


--------------------------------------------------------------------------------
SOURCE: "Try them, Pencroft," replied the engineer.
TARGET: –Probemos, Pencroff –dijo el ingeniero–.
PREDICTED: – , Pencroff – contestó el ingeniero .
--------------------------------------------------------------------------------
SOURCE: I have brought my other son and daughter to see you.
TARGET: He traído a mi otro hijo e hija para que se conozcan.
PREDICTED: He visto mi vida y a mi amo .


Processing epoch 15: 100%|██████████| 103/103 [00:28<00:00,  3.66it/s, loss=3.674]


--------------------------------------------------------------------------------
SOURCE: "Trapped!
TARGET: -¡Atrapados!
PREDICTED: -¡ !
--------------------------------------------------------------------------------
SOURCE: 'No, I will go through the garden.'
TARGET: –No, pasaré por el jardín.
PREDICTED: – No , voy a bordo del Palacio de granito .


Processing epoch 16: 100%|██████████| 103/103 [00:28<00:00,  3.67it/s, loss=3.581]


--------------------------------------------------------------------------------
SOURCE: "And an exceedingly interesting case it appears to be.
TARGET: ––Y parece tratarse de un caso sumamente interesante.
PREDICTED: - Y una idea parece que parece que parece .
--------------------------------------------------------------------------------
SOURCE: And the most passionate and impossible romances occurred to Dolly's fancy.
TARGET: Y las aventuras mis pasionales a irrealizables se presentaron a su imaginación.
PREDICTED: Y la más más que también sus caballos , ¿ no te ha venido


Processing epoch 17: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=3.454]


--------------------------------------------------------------------------------
SOURCE: This was perfectly true, but we had nearly forgotten the fact.
TARGET: Lo que era totalmente cierto, aunque casi lo hubiéramos olvidado.
PREDICTED: Era evidente que no había sido más ; pero el día había sido .
--------------------------------------------------------------------------------
SOURCE: 'No, I did not say so.
TARGET: –No, no lo he dicho...
PREDICTED: – No , no lo sé .


Processing epoch 18: 100%|██████████| 103/103 [00:28<00:00,  3.66it/s, loss=3.331]


--------------------------------------------------------------------------------
SOURCE: And why, why does the Partition of Poland interest him?'
TARGET: ¿En qué puede interesarle la división de Polonia?».
PREDICTED: Y , ¿ por qué es el asunto de él ?
--------------------------------------------------------------------------------
SOURCE: "How yes and no?"
TARGET: ¿Cómo sí y no?
PREDICTED: ––¿ Cómo , no ?


Processing epoch 19: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=3.291]


--------------------------------------------------------------------------------
SOURCE: "This is indeed important," said he.
TARGET: ––¡Esto sí que es importante! ––dijo.
PREDICTED: –– Eso es muy bien –– dijo ––.
--------------------------------------------------------------------------------
SOURCE: It is surely Cyclopides."
TARGET: Es sin duda un ejemplar de Cyclopides.
PREDICTED: ¡ Es una !


Processing epoch 20: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=3.143]


--------------------------------------------------------------------------------
SOURCE: 'Love, probably!
TARGET: Seguramente su alegría tendrá por causa el amor.
PREDICTED: – Pues bien , sí ...
--------------------------------------------------------------------------------
SOURCE: "I do."
TARGET: -Sí.
PREDICTED: - Sí .


Processing epoch 21: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=3.131]


--------------------------------------------------------------------------------
SOURCE: May thy joys be all dreams,
TARGET: en sueños tus pasatiempos,
PREDICTED: Si tú te ,
--------------------------------------------------------------------------------
SOURCE: We must cast round for another scent."
TARGET: Hemos de seguir buscando.
PREDICTED: No había más que un paso de hielo .


Processing epoch 22: 100%|██████████| 103/103 [00:28<00:00,  3.65it/s, loss=3.071]


--------------------------------------------------------------------------------
SOURCE: "They repent!" exclaimed the sailor, shrugging his shoulders.
TARGET: –¡Arrepentirse! –exclamó el marino encogiéndose de hombros.
PREDICTED: –¡ ! – exclamó el marino , encogiéndose de hombros .
--------------------------------------------------------------------------------
SOURCE: 'She says that?' he cried.
TARGET: –¡Conque dice eso! –exclamó–.
PREDICTED: –¡ Qué dice ! – exclamó .


Processing epoch 23: 100%|██████████| 103/103 [00:28<00:00,  3.68it/s, loss=2.948]


--------------------------------------------------------------------------------
SOURCE: It was only too true.
TARGET: Era cierto.
PREDICTED: No era demasiado cosa .
--------------------------------------------------------------------------------
SOURCE: "And pray why do you put your admiral to death?"
TARGET: --¿Y porqué han muerto á ese almirante?
PREDICTED: Y ahora , ¡ qué la muerte !


Processing epoch 24: 100%|██████████| 103/103 [00:28<00:00,  3.65it/s, loss=2.920]


--------------------------------------------------------------------------------
SOURCE: See how he is throwing out air and water through his blowers."
TARGET: ¡Mira el aire y el agua que arroja por las narices!
PREDICTED: ¿ Cómo sabe usted que el Sherlock Holmes se llama el agua ?
--------------------------------------------------------------------------------
SOURCE: I don't act and I worry.
TARGET: Yo, al no hacer nada, me atormento.
PREDICTED: No me pregunto si me .


Processing epoch 25: 100%|██████████| 103/103 [00:28<00:00,  3.60it/s, loss=2.859]


--------------------------------------------------------------------------------
SOURCE: "Cruel, cruel deserter!
TARGET: -¡Qué cruel fuiste, Jane!
PREDICTED: -¡ Cruel , !
--------------------------------------------------------------------------------
SOURCE: "But what hour is it, then?" the Canadian asked.
TARGET: -Pero ¿qué hora es? -preguntó el canadiense.
PREDICTED: - Pero , ¿ qué es eso , el canadiense ?


Processing epoch 26: 100%|██████████| 103/103 [00:28<00:00,  3.66it/s, loss=2.763]


--------------------------------------------------------------------------------
SOURCE: "Still, jealousy is a strange transformer of characters.
TARGET: ––No obstante, los celos pueden provocar extraños cambios en el carácter.
PREDICTED: Sin embargo , hay celos , su situación está muy satisfecho .
--------------------------------------------------------------------------------
SOURCE: Anyhow, he never got tallow-stains from a gas-jet.
TARGET: En cualquier caso, un aplique de gas no produce manchas de sebo.
PREDICTED: Más que no nos da un asunto para el .


Processing epoch 27: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=2.674]


--------------------------------------------------------------------------------
SOURCE: "We will make bellows of them!"
TARGET: Haremos de ellas fuelles de fragua.
PREDICTED: ¡ Nos !
--------------------------------------------------------------------------------
SOURCE: "Ah! It is you, Treville.
TARGET: ¡Ay sois vos, Tréville!
PREDICTED: ¡ Ah ! ¡ Sois vos , Tréville !


Processing epoch 28: 100%|██████████| 103/103 [00:28<00:00,  3.67it/s, loss=2.613]


--------------------------------------------------------------------------------
SOURCE: "’Judge not rashly’, says the Gospel," replied the cardinal.
TARGET: ¡No juzguéis temerariamente!, dice el Evangelio replicó el cardenal.
PREDICTED: ¡ no debe ser la razón ! dijo el cardenal .
--------------------------------------------------------------------------------
SOURCE: Had he said too much?
TARGET: ¿Habría hablado demasiado?
PREDICTED: ¿ Había comprendido así ?


Processing epoch 29: 100%|██████████| 103/103 [00:28<00:00,  3.67it/s, loss=2.569]


--------------------------------------------------------------------------------
SOURCE: What do you deduce from that?"
TARGET: ¿Qué deduce usted de eso?
PREDICTED: ¿ Qué sabéis lo de eso ?
--------------------------------------------------------------------------------
SOURCE: Was there a ship at our disposal in some underground harbour?
TARGET: ¿Había fondeado un buque en algún puerto interior?
PREDICTED: ¿ Había algún buque a su viaje sobre sus papeles ?


Processing epoch 30: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=2.503]


--------------------------------------------------------------------------------
SOURCE: If the current was interrupted, the magnet immediately became unmagnetized.
TARGET: Si la corriente se interrumpía, el electroimán se desimantaba inmediatamente.
PREDICTED: Si la corriente , Ned , se hallaba profundamente delante de él .
--------------------------------------------------------------------------------
SOURCE: His finger pulled the trigger before he had taken aim.
TARGET: Su dedo oprimía el gatillo antes de apuntar bien.
PREDICTED: El dedo sacó una seña del cadáver .


Processing epoch 31: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=2.390]


--------------------------------------------------------------------------------
SOURCE: Papa says I mustn't.
TARGET: ¿Has visto a Basilio Lukich?
PREDICTED: papá me parece que no .
--------------------------------------------------------------------------------
SOURCE: Our General stood in need of new recruits of young German Jesuits.
TARGET: El padre general necesitaba una leva de jesuitas alemanes mozos.
PREDICTED: El vapor continuaba cerca de esa nueva Ferrars y bajo el impasible .


Processing epoch 32: 100%|██████████| 103/103 [00:28<00:00,  3.67it/s, loss=2.389]


--------------------------------------------------------------------------------
SOURCE: CHAPTER XXVI
TARGET: Capítulo XXVI
PREDICTED: XXVI
--------------------------------------------------------------------------------
SOURCE: That day the usual work was accomplished with even greater energy.
TARGET: Aquel día se realizó con más vigor aún el trabajo habitual.
PREDICTED: Sin día la día se también con el trabajo fría ; me parecía .


Processing epoch 33: 100%|██████████| 103/103 [00:27<00:00,  3.70it/s, loss=2.294]


--------------------------------------------------------------------------------
SOURCE: "No. His orders were to stay in the house."
TARGET: ––No, sus órdenes son permanecer en la casa.
PREDICTED: - No , su órdenes cuando esperaba a la casa .
--------------------------------------------------------------------------------
SOURCE: Was it Ayrton?
TARGET: ¿Era Ayrton?
PREDICTED: ¿ Era Ayrton ?


Processing epoch 34: 100%|██████████| 103/103 [00:28<00:00,  3.66it/s, loss=2.226]


--------------------------------------------------------------------------------
SOURCE: "Me! why to me?"
TARGET: ¿A mí? ¿Y eso por qué?
PREDICTED: ¿ A mí ? ¿ A qué me ?
--------------------------------------------------------------------------------
SOURCE: "It is a wound that confines him to his bed?"
TARGET: Entonces, ¿es una estocada lo que le retiene en su cama?
PREDICTED: ¿ Es un herida que le haya venido a la cama ?


Processing epoch 35: 100%|██████████| 103/103 [00:28<00:00,  3.65it/s, loss=2.226]


--------------------------------------------------------------------------------
SOURCE: "Mr. Conseil put one over on me!"
TARGET: El señor Conseil me estaba tomando el pelo.
PREDICTED: -¡ Conseil ! - exclamó Conseil .
--------------------------------------------------------------------------------
SOURCE: His whole being was concentrated in this last word.
TARGET: En esa frase estaba expresado todo el arponero.
PREDICTED: Toda su situación estaba concentrada en el último .


Processing epoch 36: 100%|██████████| 103/103 [00:28<00:00,  3.66it/s, loss=2.186]


--------------------------------------------------------------------------------
SOURCE: IN WHICH THE CAPTIVE STILL CONTINUES HIS ADVENTURES
TARGET: Donde todavía prosigue el cautivo su suceso
PREDICTED: Donde el cautivo las maravillas del cautivo
--------------------------------------------------------------------------------
SOURCE: "I will not give him that trouble," I answered.
TARGET: -No tiene por qué molestarse tanto -dije-.
PREDICTED: –– No me da cuenta de esto –– respondí ––.


Processing epoch 37: 100%|██████████| 103/103 [00:27<00:00,  3.72it/s, loss=2.135]


--------------------------------------------------------------------------------
SOURCE: "No--stop!" interrupted Colonel Dent.
TARGET: -¡No! -interrumpió el coronel Dent-.
PREDICTED: - No , no - respondió el coronel Dent -.
--------------------------------------------------------------------------------
SOURCE: Fix, seated in the bow, gave himself up to meditation.
TARGET: Fix estaba meditabundo en la proa.
PREDICTED: Fix , pues , bajó la campanilla y me dirigió a una especie .


Processing epoch 38: 100%|██████████| 103/103 [00:28<00:00,  3.67it/s, loss=2.115]


--------------------------------------------------------------------------------
SOURCE: However, do as you please!'
TARGET: Sin embargo, haz lo que te parezca mejor.
PREDICTED: En fin ; como al fin te lo ruego .
--------------------------------------------------------------------------------
SOURCE: He positively avoids me."
TARGET: Parece evitarme.
PREDICTED: Pronto tomó mi camarote .


Processing epoch 39: 100%|██████████| 103/103 [00:28<00:00,  3.66it/s, loss=2.066]


--------------------------------------------------------------------------------
SOURCE: "And you too, Mr. Spilett, you will eat some!"
TARGET: –Con mucho gusto –dijo el corresponsal–.
PREDICTED: – Y tú , señor Spilett , con nosotros .
--------------------------------------------------------------------------------
SOURCE: 'I ought to have done that long ago.'
TARGET: ¡Ya podíamos haberlo hecho hace tiempo!
PREDICTED: Hace tiempo que me he pasado ya .


Processing epoch 40: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=2.043]


--------------------------------------------------------------------------------
SOURCE: Vronsky remained silent.
TARGET: Vronsky callaba.
PREDICTED: Vronsky callaba .
--------------------------------------------------------------------------------
SOURCE: "Certainly, Neb," answered Cyrus Harding.
TARGET: –Sí, Nab –repuso Ciro Smith.
PREDICTED: – Sí , señor – contestó Ciro Smith –.


Processing epoch 41: 100%|██████████| 103/103 [00:29<00:00,  3.53it/s, loss=1.978]


--------------------------------------------------------------------------------
SOURCE: M Bonacieux was at his door.
TARGET: El señor Bonacieux estaba a su puerta.
PREDICTED: El señor Bonacieux estaba en la puerta .
--------------------------------------------------------------------------------
SOURCE: The man replied that the Count had gone to the stables.
TARGET: El criado contestó que el Conde se dirigía a las cuadras
PREDICTED: El hombre respondió que el señor conde iban juntos .


Processing epoch 42: 100%|██████████| 103/103 [00:30<00:00,  3.41it/s, loss=1.883]


--------------------------------------------------------------------------------
SOURCE: "And who has abandoned you--is that it?"
TARGET: Y que os ha abandonado, ¿no es eso?
PREDICTED: -¿ Y quién tiene usted dinero ?
--------------------------------------------------------------------------------
SOURCE: I have brought my other son and daughter to see you.
TARGET: He traído a mi otro hijo e hija para que se conozcan.
PREDICTED: He traído mi hijo y a mi hijo .


Processing epoch 43: 100%|██████████| 103/103 [00:30<00:00,  3.37it/s, loss=1.894]


--------------------------------------------------------------------------------
SOURCE: It was worth while going a little hungry.
TARGET: Vale la pena quedarse sin comer.
PREDICTED: Su dolor pasaba un poco gigantesco .
--------------------------------------------------------------------------------
SOURCE: — Oui, je sais ce que vous avez vu.
TARGET: ––Sí, sé ya todo lo que usted vio.
PREDICTED: –– Le aseguro que no es verdad .


Processing epoch 44: 100%|██████████| 103/103 [00:30<00:00,  3.39it/s, loss=1.864]


--------------------------------------------------------------------------------
SOURCE: "Take away the prisoner," said the commissary to the two guards.
TARGET: Llevaos al prisionero dijo el comisario a los dos guardias.
PREDICTED: Tomad a vuestra prisionera dijo el comisario al comisario .
--------------------------------------------------------------------------------
SOURCE: Milady smiled.
TARGET: Milady sonrió.
PREDICTED: Milady sonrió .


Processing epoch 45: 100%|██████████| 103/103 [00:30<00:00,  3.40it/s, loss=1.857]


--------------------------------------------------------------------------------
SOURCE: Let us be off to Tver.
TARGET: La ventana está abierta.
PREDICTED: Hemos ido con la suficiente .
--------------------------------------------------------------------------------
SOURCE: I am simply unhappy.
TARGET: Soy muy desgraciada.
PREDICTED: Estoy verdaderamente desgraciada .


Processing epoch 46: 100%|██████████| 103/103 [00:29<00:00,  3.47it/s, loss=1.801]


--------------------------------------------------------------------------------
SOURCE: You are killing me now."
TARGET: Ya me estás matando ahora.
PREDICTED: Aquí estás aquí .
--------------------------------------------------------------------------------
SOURCE: Not exactly. Your witch's skill is rather at fault sometimes."
TARGET: No es eso precisamente.
PREDICTED: Ni más claramente el almirante de visitar a las opiniones de su doble manera .


Processing epoch 47: 100%|██████████| 103/103 [00:28<00:00,  3.67it/s, loss=1.816]


--------------------------------------------------------------------------------
SOURCE: How clearly he puts everything!'
TARGET: Lo ve todo con una claridad...
PREDICTED: ¡ Qué delgada y qué se lo pone !
--------------------------------------------------------------------------------
SOURCE: "You are hungry," I remarked.
TARGET: ––Viene usted hambriento ––comenté.
PREDICTED: - Usted es - dije .


Processing epoch 48: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=1.752]


--------------------------------------------------------------------------------
SOURCE: It was forty-eight hours since I had taken any nourishment.
TARGET: Cuando me desperté, una nueva mesa estaba servida.
PREDICTED: Era cuarenta y ocho horas que no hubieran hecho más o cinco años .
--------------------------------------------------------------------------------
SOURCE: He entered the chamber and closed the door behind him.
TARGET: Entró en la habitación y cerró la puerta tras sí.
PREDICTED: Entró en la habitación y la puerta salió de la puerta .


Processing epoch 49: 100%|██████████| 103/103 [00:28<00:00,  3.55it/s, loss=1.745]


--------------------------------------------------------------------------------
SOURCE: Levin felt guilty but could do nothing.
TARGET: Levin, sin poderlo remediar, se sentía culpable.
PREDICTED: Levin sentía culpable y no podía hacer nada .
--------------------------------------------------------------------------------
SOURCE: CHAPTER XVIII
TARGET: Capítulo XVIII
PREDICTED: XVIII


Processing epoch 50: 100%|██████████| 103/103 [00:30<00:00,  3.39it/s, loss=1.725]


--------------------------------------------------------------------------------
SOURCE: 'Kitty, don't be angry!
TARGET: –Kitty, no te enfades.
PREDICTED: –¿ Cómo , Kitty ?
--------------------------------------------------------------------------------
SOURCE: 'What's the matter?'
TARGET: –¿Qué te pasa?
PREDICTED: –¿ Qué te pasa ?


Processing epoch 51: 100%|██████████| 103/103 [00:29<00:00,  3.50it/s, loss=1.700]


--------------------------------------------------------------------------------
SOURCE: I am simply unhappy.
TARGET: Soy muy desgraciada.
PREDICTED: Estoy incapaz .
--------------------------------------------------------------------------------
SOURCE: What will he say when he returns?"
TARGET: ¿Qué dirá cuando vuelva?
PREDICTED: ¿ Qué opina usted ?


Processing epoch 52: 100%|██████████| 103/103 [00:28<00:00,  3.66it/s, loss=1.699]


--------------------------------------------------------------------------------
SOURCE: "What do you mean, then?"
TARGET: -Entonces, ¿a qué se refiere?
PREDICTED: ––¿ Qué quiere usted decir ?
--------------------------------------------------------------------------------
SOURCE: "No!" replies my uncle.
TARGET: —¡No! —responde mi tío.
PREDICTED: — No , mi tío .


Processing epoch 53: 100%|██████████| 103/103 [00:28<00:00,  3.68it/s, loss=1.711]


--------------------------------------------------------------------------------
SOURCE: "How!" said Bonacieux, astonished.
TARGET: ¿Cómo? dijo Bonacieux, extrañado.
PREDICTED: ¿ Cómo ? dijo Bonacieux asombrado .
--------------------------------------------------------------------------------
SOURCE: He liked Katavasov because of his clear and simple outlook on life.
TARGET: Katavasov le atraía por la claridad y sencillez de sus ideas.
PREDICTED: Se acercó a Katavasov y tranquila , y tranquila ; pero es excelente vida .


Processing epoch 54: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=1.697]


--------------------------------------------------------------------------------
SOURCE: "I have heard of you, Mr. Holmes.
TARGET: ––He oído hablar de usted, señor Holmes.
PREDICTED: - Lo he oído , señor Holmes .
--------------------------------------------------------------------------------
SOURCE: "You are very good.
TARGET: -Es usted muy amable.
PREDICTED: ––¿ Cómo está usted bien ?


Processing epoch 55: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=1.657]


--------------------------------------------------------------------------------
SOURCE: "Yes, sir."
TARGET: -Sí.
PREDICTED: - Sí , señor .
--------------------------------------------------------------------------------
SOURCE: Jane Eyre
TARGET: Jane Eyre
PREDICTED: Jane ...


Processing epoch 56: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=1.649]


--------------------------------------------------------------------------------
SOURCE: How the theories will hinder us, won't they?"
TARGET: ¡Cuánto van a darnos que hacer!
PREDICTED: ¿ Cómo los gatos son estos gatos ?
--------------------------------------------------------------------------------
SOURCE: 'Es ist ein ganz einfaches Ding,' [Oh, yes!
TARGET: Es ist ein ganz einfaches Ding.
PREDICTED: – Es preciso – añadió Vronsky .


Processing epoch 57: 100%|██████████| 103/103 [00:27<00:00,  3.69it/s, loss=1.634]


--------------------------------------------------------------------------------
SOURCE: One part of Captain Nemo's secret life had been unveiled.
TARGET: Eso desvelaba una parte de la misteriosa existencia del capitán Nemo.
PREDICTED: Un poco de tierra , el capitán Nemo había sido alguna .
--------------------------------------------------------------------------------
SOURCE: "Round the world?" cried Fix.
TARGET: ¿La vuelta al mundo? Exclamó Fix.
PREDICTED: ¿ La vuelta al mundo ? dijo Fix .


Processing epoch 58: 100%|██████████| 103/103 [00:27<00:00,  3.68it/s, loss=1.627]


--------------------------------------------------------------------------------
SOURCE: The company rose to go into the garden.
TARGET: Los invitados se levantaron en aquel momento para salir al jardín.
PREDICTED: El choque tomó bastante al jardín .
--------------------------------------------------------------------------------
SOURCE: "Kindly follow this man."
TARGET: Tengan la amabilidad de seguir a este hombre.
PREDICTED: - Entonces , esta persona .


Processing epoch 59: 100%|██████████| 103/103 [00:28<00:00,  3.60it/s, loss=1.605]


--------------------------------------------------------------------------------
SOURCE: In your place I would stake the furniture against the horse."
TARGET: En vuestro lugar, yo jugaría vuestros arneses contra vuestro caballo.
PREDICTED: En vuestro sitio les sigo roto el caballo .
--------------------------------------------------------------------------------
SOURCE: 'Why don't you like her husband?
TARGET: –¿Y por qué a su marido no?
PREDICTED: –¿ Cómo no te gusta su marido ?


Processing epoch 60: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=1.625]


--------------------------------------------------------------------------------
SOURCE: "As far as the centre of the earth, Axel."
TARGET: —Sí, hasta el centro de la tierra.
PREDICTED: — Tan sólo el centro de la tierra .
--------------------------------------------------------------------------------
SOURCE: His finger pulled the trigger before he had taken aim.
TARGET: Su dedo oprimía el gatillo antes de apuntar bien.
PREDICTED: Sus manos se tras haber marchado como el horizonte .


Processing epoch 61: 100%|██████████| 103/103 [00:27<00:00,  3.69it/s, loss=1.590]


--------------------------------------------------------------------------------
SOURCE: I inquired soon if he had not been to London.
TARGET: Le pregunté si había estado en Londres.
PREDICTED: Pregunté por ti si no se hubiera dicho nada .
--------------------------------------------------------------------------------
SOURCE: "Yes, when the house where he fraternizes is suspected."
TARGET: Sí, cuando la casa en la que confraterniza con ese amigo es sospechosa.
PREDICTED: - Sí , cuando el resto de la casa .


Processing epoch 62: 100%|██████████| 103/103 [00:27<00:00,  3.69it/s, loss=1.608]


--------------------------------------------------------------------------------
SOURCE: Why _did_ Mr. Rochester enforce this concealment?
TARGET: ¿Por qué Rochester toleraba aquello?
PREDICTED: ¿ Y qué hizo entonces los turcos ?
--------------------------------------------------------------------------------
SOURCE: 'He is pitiable, he is overwhelmed with remorse...'
TARGET: Es digno también de compasión; el arrepentimiento le tiene abatido.
PREDICTED: Se trata de un paseo como cerrado por remordimientos .


Processing epoch 63: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=1.580]


--------------------------------------------------------------------------------
SOURCE: "Drink and relate, then."
TARGET: Bebed y contad.
PREDICTED: - Bebe y maquinalmente .
--------------------------------------------------------------------------------
SOURCE: The atmosphere was turning white and milky.
TARGET: La atmósfera estaba blanca, lechosa.
PREDICTED: Hacía una escena que salieron bajo los planes y la capa de espesor .


Processing epoch 64: 100%|██████████| 103/103 [00:28<00:00,  3.65it/s, loss=1.579]


--------------------------------------------------------------------------------
SOURCE: 'But what was there in church on Sunday?
TARGET: –¿Qué pasó el domingo en la iglesia? –preguntó el Príncipe–.
PREDICTED: – Pero ¿ qué era en una iglesia ?
--------------------------------------------------------------------------------
SOURCE: Was our presence on board perhaps a burden to him?
TARGET: ¿Tal vez se le hacía insoportable nuestra presencia a bordo?
PREDICTED: ¿ Era nuestra presencia un detalle como yo ?


Processing epoch 65: 100%|██████████| 103/103 [00:28<00:00,  3.68it/s, loss=1.558]


--------------------------------------------------------------------------------
SOURCE: Everybody knew your errand."
TARGET: Todos lo saben.
PREDICTED: Todos lo demás se que es capaz de enamorarse .
--------------------------------------------------------------------------------
SOURCE: I didn't tell my two companions about this new danger.
TARGET: Me abstuve de comunicar este nuevo peligro a mis dos compañeros.
PREDICTED: No pude responderle entre mis dos compañeros es éste .


Processing epoch 66: 100%|██████████| 103/103 [00:27<00:00,  3.68it/s, loss=1.573]


--------------------------------------------------------------------------------
SOURCE: "And you said nothing?"
TARGET: -¿Y no dijo nada?
PREDICTED: ¿ Y no habéis dicho nada ?
--------------------------------------------------------------------------------
SOURCE: During the last week of the month of August the weather moderated again.
TARGET: Durante la última semana de aquel mes de agosto el tiempo cambió de nuevo.
PREDICTED: Durante la semana de las siete de roca desnuda de .


Processing epoch 67: 100%|██████████| 103/103 [00:27<00:00,  3.70it/s, loss=1.545]


--------------------------------------------------------------------------------
SOURCE: "What about master's live babirusa?"
TARGET: -¿Y el babirusa vivo del señor?
PREDICTED: -¿ Y el señor de los " No es allá ?
--------------------------------------------------------------------------------
SOURCE: I may then tell the cardinal, with respect to this little woman--"
TARGET: Puedo, por tanto, decir al cardenal que, respecto a esa mujer...
PREDICTED: Sé que el cardenal le a su mujer ...


Processing epoch 68: 100%|██████████| 103/103 [00:27<00:00,  3.70it/s, loss=1.554]


--------------------------------------------------------------------------------
SOURCE: "Nothing."
TARGET: –¡Sí!...
PREDICTED: –– Nada .
--------------------------------------------------------------------------------
SOURCE: Then she broke the silence to cry out,
TARGET: Entonces ella rompió el silencio para exclamar:
PREDICTED: Luego , dio el silencio de cabeza un instante , cruzó la mente .


Processing epoch 69: 100%|██████████| 103/103 [00:27<00:00,  3.70it/s, loss=1.552]


--------------------------------------------------------------------------------
SOURCE: I am staying there while I conduct the inquiry."
TARGET: Clair. Me estoy alojando allí mientras llevo a cabo la investigación.
PREDICTED: aquí por buen camino ; supongo que la investigación .
--------------------------------------------------------------------------------
SOURCE: "When will we reach Vanikoro?"
TARGET: -¿Cuándo estaremos en Vanikoro?
PREDICTED: -¿ Cuándo nos vamos a Vanikoro ?


Processing epoch 70: 100%|██████████| 103/103 [00:27<00:00,  3.68it/s, loss=1.538]


--------------------------------------------------------------------------------
SOURCE: "I have won five pistoles of Aramis."
TARGET: Le he ganado cinco pistolas a Aramis.
PREDICTED: Tengo cinco pistolas .
--------------------------------------------------------------------------------
SOURCE: Signed, Richard Mason.'"
TARGET: Firmado: Richard Mason.»
PREDICTED: : ¡ Cómo te llamas !


Processing epoch 71: 100%|██████████| 103/103 [00:28<00:00,  3.68it/s, loss=1.555]


--------------------------------------------------------------------------------
SOURCE: That happened which M. de Treville had foreseen.
TARGET: Lo que había previsto el señor de Tréville ocurrió.
PREDICTED: De maravilla que la misma del señor Tréville había contado encima .
--------------------------------------------------------------------------------
SOURCE: 'I never maintained it,' thought Levin...
TARGET: «Jamás lo he asegurado», pensó Levin.
PREDICTED: «¡ No he pensado nada ! – comentó Levin –.


Processing epoch 72: 100%|██████████| 103/103 [00:27<00:00,  3.70it/s, loss=1.530]


--------------------------------------------------------------------------------
SOURCE: "The Miss Reeds could not play as well!" said she exultingly.
TARGET: -¡Las señoritas no tocan tan bien! -dijo con entusiasmo-.
PREDICTED: -¡ La señorita Stoner permitía desplazarse como no ! - dijo con voz baja .
--------------------------------------------------------------------------------
SOURCE: "Our case is not complete.
TARGET: -Nuestro caso no está terminado.
PREDICTED: El caso no es absolutamente nada .


Processing epoch 73: 100%|██████████| 103/103 [00:27<00:00,  3.70it/s, loss=1.535]


--------------------------------------------------------------------------------
SOURCE: No wind, and not a cloud in the sky.
TARGET: No hay viento, ni se ve una nube en el cielo.
PREDICTED: El viento y media absoluta ventana estaba vacía y los colonos .
--------------------------------------------------------------------------------
SOURCE: You should come and join us here.'
TARGET: Porque en este caso podría sentarse con nosotros.
PREDICTED: Usted volverá a tu parte .


Processing epoch 74: 100%|██████████| 103/103 [00:27<00:00,  3.74it/s, loss=1.541]


--------------------------------------------------------------------------------
SOURCE: Gregor had almost entirely stopped eating.
TARGET: Gregorio ya no comía casi nada.
PREDICTED: Gregorio todos estaban únicamente para tomar siquiera .
--------------------------------------------------------------------------------
SOURCE: 'Well, shall we go?'
TARGET: ¿Vamos?
PREDICTED: –¿ Qué ? ¿ Vamos ?


Processing epoch 75: 100%|██████████| 103/103 [00:27<00:00,  3.70it/s, loss=1.531]


--------------------------------------------------------------------------------
SOURCE: 'Yes, yes, yes...'
TARGET: –Sí, sí, sí.
PREDICTED: – Sí , sí ...
--------------------------------------------------------------------------------
SOURCE: "But suppose it is an extinct volcano?"
TARGET: —¿Y si se trata de un cráter apagado?
PREDICTED: –¿ Y eso está cerrado ?


Processing epoch 76: 100%|██████████| 103/103 [00:27<00:00,  3.69it/s, loss=1.519]


--------------------------------------------------------------------------------
SOURCE: 'I shall be very glad,' replied Koznyshev, still smiling.
TARGET: –Conforme. Me gustará mucho –contestó Sergio Ivanovich, siempre sonriente.
PREDICTED: – Lo haré muy simpática – dijo Sergio , sonriendo .
--------------------------------------------------------------------------------
SOURCE: This current swept along with it a whole host of moving creatures.
TARGET: La corriente arrastraba con ella a todo un mundo de seres vivos.
PREDICTED: Esta corriente , se por una persona una verdadera debilidad de nuestro viaje .


Processing epoch 77: 100%|██████████| 103/103 [00:27<00:00,  3.70it/s, loss=1.527]


--------------------------------------------------------------------------------
SOURCE: "Have I anything to pay?" demanded d’Artagnan.
TARGET: ¿Debo algo? preguntó D'Artagnan.
PREDICTED: ¿ Es algo que hacer ? preguntó D ' Artagnan .
--------------------------------------------------------------------------------
SOURCE: "Certainly, my best.
TARGET: -Sí, querida.
PREDICTED: - Desde luego , sería lo mejor .


Processing epoch 78: 100%|██████████| 103/103 [00:27<00:00,  3.70it/s, loss=1.510]


--------------------------------------------------------------------------------
SOURCE: "Oh, yes, yes; you are right.
TARGET: ¡Oh, sí, sí, tenéis razón!
PREDICTED: ¡ Oh , sí ! ¿ Conque tenéis razón ?
--------------------------------------------------------------------------------
SOURCE: One is a she-bear.
TARGET: ¡Vámonos ahora mismo a Tver!
PREDICTED: Hay una .


Processing epoch 79: 100%|██████████| 103/103 [00:27<00:00,  3.71it/s, loss=1.504]


--------------------------------------------------------------------------------
SOURCE: But what kind of man was he?
TARGET: Pero, ¿qué clase de persona era?
PREDICTED: Pero ¿ qué hombre era él ?
--------------------------------------------------------------------------------
SOURCE: "No, I am descending."
TARGET: ––En efecto, voy descendiendo.
PREDICTED: - No , me quedo .


Processing epoch 80: 100%|██████████| 103/103 [00:27<00:00,  3.69it/s, loss=1.504]


--------------------------------------------------------------------------------
SOURCE: "Certainly, my best.
TARGET: -Sí, querida.
PREDICTED: - Desde luego , es lo que necesito .
--------------------------------------------------------------------------------
SOURCE: The Nautilus kept descending.
TARGET: El Nautilus continuó descendiendo.
PREDICTED: El Nautilus parecía un violento .


Processing epoch 81: 100%|██████████| 103/103 [00:28<00:00,  3.66it/s, loss=1.500]


--------------------------------------------------------------------------------
SOURCE: Her arms rose and her hands dropped on his shoulders.
TARGET: Sus manos se levantaron y se posaron en los hombros de Levin.
PREDICTED: Ana se levantó y cogió la mano sobre sus hombros .
--------------------------------------------------------------------------------
SOURCE: "Why so?" said Sancho.
TARGET: -Pues, ¿por qué? -dijo Sancho.
PREDICTED: -¿ Por qué ? - dijo Sancho -.


Processing epoch 82: 100%|██████████| 103/103 [00:27<00:00,  3.69it/s, loss=1.499]


--------------------------------------------------------------------------------
SOURCE: I am staying there while I conduct the inquiry."
TARGET: Clair. Me estoy alojando allí mientras llevo a cabo la investigación.
PREDICTED: Estoy aquí para mi propia roca . Yo estoy vivo en los medios .
--------------------------------------------------------------------------------
SOURCE: "With a wood round it?"
TARGET: -¿Y un bosque alrededor?
PREDICTED: -¿ Con un bosque ?


Processing epoch 83: 100%|██████████| 103/103 [00:27<00:00,  3.68it/s, loss=1.511]


--------------------------------------------------------------------------------
SOURCE: He said he would come back.'
TARGET: Ha dicho que volvería.
PREDICTED: Él dijo que había llegado .
--------------------------------------------------------------------------------
SOURCE: "Why don't you consult my art?"
TARGET: -¿Cómo no quería consultar mi ciencia?
PREDICTED: -¿ Cómo usted ?


Processing epoch 84: 100%|██████████| 103/103 [00:27<00:00,  3.77it/s, loss=1.488]


--------------------------------------------------------------------------------
SOURCE: "That is the exact situation.
TARGET: -Buenos días, Holmes -dijo el baronet-.
PREDICTED: Es la situación de la situación .
--------------------------------------------------------------------------------
SOURCE: "And does she go from Suez directly to Bombay?"
TARGET: ¿Y de Suez se marcha directamente a Bombay?
PREDICTED: ¿ Y está bien en Suez de este Bombay ?


Processing epoch 85: 100%|██████████| 103/103 [00:27<00:00,  3.79it/s, loss=1.500]


--------------------------------------------------------------------------------
SOURCE: "Lord!
TARGET: -¡Por Dios, Anne! -exclamó su hermana-.
PREDICTED: ¡ Ay , Dios !
--------------------------------------------------------------------------------
SOURCE: "The devil!" muttered Passepartout.
TARGET: ¡Diantre! exclamó Picaporte.
PREDICTED: ¡ Diablos ! exclamó Picaporte .


Processing epoch 86: 100%|██████████| 103/103 [00:27<00:00,  3.81it/s, loss=1.484]


--------------------------------------------------------------------------------
SOURCE: Don't you see now whence these words have been taken?"
TARGET: ¿Ve usted ahora de dónde se han tomado esas palabras?
PREDICTED: ¿ No va usted francamente de esos palabras ?
--------------------------------------------------------------------------------
SOURCE: "Yes, perfectly well--intimately even."
TARGET: Sí, perfectamente, mucho incluso.
PREDICTED: – Sí , en efecto : el está íntimamente ligado .


Processing epoch 87: 100%|██████████| 103/103 [00:27<00:00,  3.81it/s, loss=1.487]


--------------------------------------------------------------------------------
SOURCE: To this no reply was possible.
TARGET: No había respuesta posible a esa pregunta.
PREDICTED: A ello no fue posible .
--------------------------------------------------------------------------------
SOURCE: You consider Vronsky an aristocrat. I don't.
TARGET: Tú consideras que Vronsky es un aristócrata y yo no.
PREDICTED: Usted ha hecho Vronsky junto a Vronsky .


Processing epoch 88: 100%|██████████| 103/103 [00:27<00:00,  3.79it/s, loss=1.478]


--------------------------------------------------------------------------------
SOURCE: They fear them, therefore they must know them."
TARGET: Los temen, luego los conocen.
PREDICTED: No están más pues , como pueden saber .
--------------------------------------------------------------------------------
SOURCE: Does he wish it?'
TARGET: ¿Lo desea él?
PREDICTED: ¿ Estará usted satisfecho ?


Processing epoch 89: 100%|██████████| 103/103 [00:27<00:00,  3.78it/s, loss=1.495]


--------------------------------------------------------------------------------
SOURCE: My undaunted uncle calmly shook his head.
TARGET: Mi tío sacudió la cabeza con calma.
PREDICTED: Mi tío golpeó la cabeza con la cabeza .
--------------------------------------------------------------------------------
SOURCE: Where?
TARGET: ''¿Adónde?
PREDICTED: Pero ¿ dónde ?


Processing epoch 90: 100%|██████████| 103/103 [00:27<00:00,  3.79it/s, loss=1.479]


--------------------------------------------------------------------------------
SOURCE: How can I ask them away from her?"
TARGET: ¿Cómo puedo pedirles que la dejen?
PREDICTED: ¿ Cómo podré retener de ella ?
--------------------------------------------------------------------------------
SOURCE: Gräuben was far away; and I never hoped to see her again.
TARGET: Y ni aun esperanzas tenía de volver a verla jamás.
PREDICTED: Graüben .


Processing epoch 91: 100%|██████████| 103/103 [00:26<00:00,  3.82it/s, loss=1.478]


--------------------------------------------------------------------------------
SOURCE: "The Coroner: What do you mean?
TARGET: »El juez: ¿Qué quiere decir con eso?
PREDICTED: » El juez : ¿ Qué queréis decir ?
--------------------------------------------------------------------------------
SOURCE: 'Amen!' from the invisible choir, again floated through the air.
TARGET: «¡Amén!» llenaron de nuevo el aire las voces del coro.
PREDICTED: – Amén a Amén de paseo por los Scherbazky , se dirigió al aire .


Processing epoch 92: 100%|██████████| 103/103 [00:27<00:00,  3.80it/s, loss=1.486]


--------------------------------------------------------------------------------
SOURCE: Karenin entered the boudoir.
TARGET: Alexey Alejandrovich entró en el gabinete de Ana.
PREDICTED: Alexey Alejandrovich entró con los últimos movimientos .
--------------------------------------------------------------------------------
SOURCE: CHAPTER 32
TARGET: CAPITULO XXXII
PREDICTED: Capítulo XXXII


Processing epoch 93: 100%|██████████| 103/103 [00:27<00:00,  3.77it/s, loss=1.469]


--------------------------------------------------------------------------------
SOURCE: CHAPTER XXVIII. THE RESCUE IN THE WHISPERING GALLERY
TARGET: Capítulo XXVIII
PREDICTED: Capítulo XXVIII
--------------------------------------------------------------------------------
SOURCE: 'Very ill?
TARGET: ¿Muy enferma?
PREDICTED: – Muy mal .


Processing epoch 94: 100%|██████████| 103/103 [00:27<00:00,  3.78it/s, loss=1.473]


--------------------------------------------------------------------------------
SOURCE: "That is well.
TARGET: ––Eso está bien.
PREDICTED: –– Estupendo .
--------------------------------------------------------------------------------
SOURCE: Speak out, man, and don't stand staring!"
TARGET: ¡Hable, caramba, y no se me quede mirando!
PREDICTED: ¡ Hablad , hombre , y no con voz alta !


Processing epoch 95: 100%|██████████| 103/103 [00:27<00:00,  3.77it/s, loss=1.479]


--------------------------------------------------------------------------------
SOURCE: "Confound it!" exclaimed the sailor.
TARGET: –¡Maldición! –exclamó el marino, sin contenerse.
PREDICTED: –¡ Miss Eyre ! – exclamó el marino .
--------------------------------------------------------------------------------
SOURCE: "I am, then, your prisoner?"
TARGET: ¿Soy, pues, vuestra prisionera?
PREDICTED: Entonces , ¿ os haré vuestra prisionera ?


Processing epoch 96: 100%|██████████| 103/103 [00:27<00:00,  3.79it/s, loss=1.477]


--------------------------------------------------------------------------------
SOURCE: Alexander smiled gaily.
TARGET: Alejandro Vronsky, que lo sabía, sonrió con jovialidad.
PREDICTED: Alejandro .
--------------------------------------------------------------------------------
SOURCE: He was a nobleman, a man equal to Buckingham in every respect.
TARGET: Era un gran señor, era un hombre en todo el igual de Buckingham.
PREDICTED: El noble lo comprenderá todo a su comandante Ketty .


Processing epoch 97: 100%|██████████| 103/103 [00:27<00:00,  3.78it/s, loss=1.475]


--------------------------------------------------------------------------------
SOURCE: Impatient of delay, with reckless pace
TARGET: Salió el deseo de compás, y el paso
PREDICTED: Cruel Vireno , con gran estruendo , , se dirigieron a sus pies .
--------------------------------------------------------------------------------
SOURCE: CHAPTER VII
TARGET: VII
PREDICTED: VII


Processing epoch 98: 100%|██████████| 103/103 [00:26<00:00,  3.83it/s, loss=1.470]


--------------------------------------------------------------------------------
SOURCE: "What do you mean?"
TARGET: -¿Por qué?
PREDICTED: -¿ Qué quieres decir ?
--------------------------------------------------------------------------------
SOURCE: Being fossils, we looked upon all those things as mere jokes.
TARGET: En nuestra calidad de fósiles, nos burlábamos de estas maravillas inútiles.
PREDICTED: No pudiendo nosotros , los miró de una situación , sobre las regiones .


Processing epoch 99: 100%|██████████| 103/103 [00:27<00:00,  3.79it/s, loss=1.469]


--------------------------------------------------------------------------------
SOURCE: 'I was only going to say that...'
TARGET: –Quisiera decirte...
PREDICTED: – Sólo quería decir ..
--------------------------------------------------------------------------------
SOURCE: "What, when we have nothing to do but keep going down!"
TARGET: —¡Cómo fatigoso, cuando siempre caminamos cuesta abajo!
PREDICTED: -¡ Qué , cuándo no vamos a conseguir nada !


# Using a pretrained model

In [10]:
from transformers import MarianMTModel, MarianTokenizer

model_name = "Helsinki-NLP/opus-mt-en-es"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)



In [11]:
src_text = ['What, when we have nothing to do but keep going down!', 'I was only going to say that...', 'Being fossils, we looked upon all those things as mere jokes.', 'What do you mean?']
inputs = tokenizer(src_text, return_tensors='pt', padding=True)
translated = model.generate(**inputs)
tgt_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
print(tgt_text)

['¿Qué, cuando no tenemos nada que hacer sino seguir bajando!', 'Sólo iba a decir que...', 'Siendo fósiles, veíamos todas esas cosas como meras bromas.', '¿Qué quieres decir?']
