# Etapa 1: Importação das bibliotecas

In [None]:
import numpy as np
import math
import re
import time
import zipfile
import random
from google.colab import drive
import tensorflow as tf
from tensorflow.keras import layers
import tensorflow_datasets as tfds

# Etapa 2: Pré-processamento dos dados

## Carregamento da base de dados

- Bases de dados: https://www.statmt.org/europarl/

In [None]:
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
import os, sys, tarfile
path = '/content/drive/MyDrive/Base de Dados/pt-en.tgz'
zip_object = tarfile.open(path)
zip_object.extractall('./pt-en/')
zip_object.close()

In [None]:
with open('/content/pt-en/europarl-v7.pt-en.en', mode='r', encoding='utf-8') as f:
  europarl_en = f.read()
with open('/content/pt-en/europarl-v7.pt-en.pt', mode='r', encoding='utf-8') as f:
  europarl_pt = f.read ()

In [None]:
europarl_en[0:100]

'Resumption of the session\nI declare resumed the session of the European Parliament adjourned on Frid'

In [None]:
en = europarl_en.split('\n')

In [None]:
len(en)

1960408

In [None]:
en[:5]

['Resumption of the session',
 'I declare resumed the session of the European Parliament adjourned on Friday 17 December 1999, and I would like once again to wish you a happy new year in the hope that you enjoyed a pleasant festive period.',
 "Although, as you will have seen, the dreaded 'millennium bug' failed to materialise, still the people in a number of countries suffered a series of natural disasters that truly were dreadful.",
 'You have requested a debate on this subject in the course of the next few days, during this part-session.',
 "In the meantime, I should like to observe a minute' s silence, as a number of Members have requested, on behalf of all the victims concerned, particularly those of the terrible storms, in the various countries of the European Union."]

In [None]:
pt = europarl_pt.split('\n')

In [None]:
len(pt)

1960408

In [None]:
for _ in range(5):
  print('-----')
  i = random.randint(0, len(en) - 1)
  print(en[i])
  print(pt[i])

-----
Service quality and innovation are likely to be improved.
A qualidade do serviço e a inovação sofrerão provavelmente melhorias.
-----
This again is, in my view, unique. Usually our three institutions go their own ways and then try to coordinate a little at the end.
Também isto é, quanto a mim, de algum modo inovador pois, na maioria das vezes, as nossas instituições seguem o seu próprio caminho, procurando no fim introduzir ainda alguma coordenação.
-----
Following this, the Italian authorities, primarily the government, with the cooperation of all local bodies concerned, must move quickly to calculate the cost of the direct damage sustained, in order to meet the deadline of 15 June 2009 for submitting an application for funds within the framework of the European Solidarity Fund.
Na sequência disto, as autoridades italianas, em primeiro lugar o governo, com a cooperação de todos os organismos locais, deverão proceder com celeridade para calcular o custo dos danos directos sofrido

## Limpeza dos dados

In [None]:
corpus_en = europarl_en
corpus_en = re.sub(r"\.(?=[0-9]|[a-z]|[A-Z])", ".$$$", corpus_en)
corpus_en = re.sub(r".\$\$\$", '', corpus_en)
corpus_en = re.sub(r" +", " ", corpus_en)
corpus_en = corpus_en.split('\n')

In [None]:
corpus_pt = europarl_pt
corpus_pt = re.sub(r"\.(?=[0-9]|[a-z]|[A-Z])", ".$$$", corpus_pt)
corpus_pt = re.sub(r".\$\$\$", '', corpus_pt)
corpus_pt = re.sub(r" +", " ", corpus_pt)
corpus_pt = corpus_pt.split('\n')

In [None]:
len(corpus_en)

1960408

In [None]:
len(corpus_pt)

1960408


## Tokenização

In [None]:
2**13

8192

In [None]:
# Caso tenha erro por causa de versão use a linha abaixo
tokenizer_en = tfds.deprecated.text.SubwordTextEncoder.build_from_corpus(corpus_en, target_vocab_size=2**16)

# tokenizer_en = tfds.features.text.SubwordTextEncoder.build_from_corpus(corpus_en, target_vocab_size=2**13)

In [None]:
tokenizer_en.vocab_size

66117

In [None]:
tokenizer_pt = tfds.deprecated.text.SubwordTextEncoder.build_from_corpus(corpus_pt, target_vocab_size=2**13)

In [None]:
tokenizer_pt.vocab_size

8116

In [None]:
vocab_size_en = tokenizer_en.vocab_size + 2
vocab_size_pt = tokenizer_pt.vocab_size + 2

In [None]:
inputs = [[vocab_size_en - 2] + tokenizer_en.encode(sentence) + [vocab_size_en - 1] for sentence in corpus_en]

In [None]:
for _ in range(5):
  print(inputs[random.randint(0, len(inputs) - 1)])

[66117, 36, 17, 8, 1302, 4, 426, 15, 571, 8, 107, 2176, 119, 231, 16, 6952, 82, 5, 601, 95, 6, 253, 16, 5625, 82, 4, 327, 154, 21, 5722, 2, 6315, 2, 3512, 5, 1716, 75, 7, 61, 46, 172, 1, 22, 106, 65900, 29, 4918, 201, 2656, 1, 550, 3, 3671, 53, 2597, 65907, 66118]
[66117, 24, 2056, 1123, 9, 122, 65900, 29, 2179, 5, 147, 4408, 2011, 1007, 65907, 66118]
[66117, 6, 543, 491, 2855, 94, 36, 17, 993, 944, 8, 1253, 494, 1, 520, 397, 7, 39, 3, 82, 46, 50832, 65907, 66118]
[66117, 24, 4109, 251, 5621, 4883, 5836, 1, 484, 4, 90, 998, 439, 6, 632, 2790, 23875, 2, 20, 190, 20, 443, 278, 10, 1, 5857, 88, 29, 4249, 5, 1430, 6, 545, 92, 355, 5, 1196, 5, 6, 303, 2790, 1992, 545, 5, 16498, 65907, 66118]
[66117, 45, 34, 13, 30, 187, 7, 9, 1011, 31, 1, 4022, 453, 65907, 66118]


In [None]:
outputs = [[vocab_size_pt - 2] + tokenizer_pt.encode(sentence) + [vocab_size_pt - 1] for sentence in corpus_pt]

In [None]:
for _ in range(5):
  print(outputs[random.randint(0, len(outputs) - 1)])

[8116, 1336, 251, 2831, 46, 242, 7906, 8117]
[8116, 7734, 318, 4, 487, 31, 1686, 4718, 24, 575, 8, 188, 7906, 8117]
[8116, 97, 5814, 1, 723, 626, 7905, 239, 44, 1, 11, 1405, 1, 9, 6310, 19, 140, 4867, 3139, 7906, 8117]
[8116, 1130, 6647, 135, 6, 3, 1387, 802, 1362, 1, 2633, 240, 304, 3133, 7972, 226, 7906, 8117]
[8116, 2877, 3875, 1, 7251, 7905, 239, 6, 144, 1084, 1, 288, 642, 28, 775, 7906, 8117]


## Remoção de sentenças muito longas

In [None]:
max_length = 15
idx_to_remove = [count for count, sent in enumerate(inputs) if len(sent) > max_length]

In [None]:
len(idx_to_remove)

1620999

In [None]:
for idx in reversed(idx_to_remove):
  del inputs[idx]
  del outputs[idx]

In [None]:
idx_to_remove = [count for count, sent in enumerate(outputs) if len(sent) > max_length]

In [None]:
len(idx_to_remove)

120018

In [None]:
for idx in reversed(idx_to_remove):
  del inputs[idx]
  del outputs[idx]

In [None]:
len(inputs)

219391

In [None]:
len(outputs)

219391

## Padding e batches

In [None]:
inputs = tf.keras.preprocessing.sequence.pad_sequences(inputs, value=0, padding = 'post', maxlen=max_length)
outputs = tf.keras.preprocessing.sequence.pad_sequences(outputs, value=0, padding = 'post', maxlen=max_length)

In [None]:
for _ in range(5):
  print(outputs[random.randint(0, len(outputs) - 1)])

[8116 1800   18 2846    2   13  349 3960   80 7906 8117    0    0    0
    0]
[8116  296   18    3   52  743  446   11 2995   29   13 1052  454 7906
 8117]
[8116 1120 4409   57 2002 3365 7906 8117    0    0    0    0    0    0
    0]
[8116 3192 7892    3  314    8 7034 7608 7923 8117    0    0    0    0
    0]
[8116 7900 2329 7901 8117    0    0    0    0    0    0    0    0    0
    0]


from_tensor_slices: https://www.geeksforgeeks.org/tensorflow-tf-data-dataset-from_tensor_slices/

cache e prefetch: https://www.tensorflow.org/guide/data_performance

In [None]:
batch_size = 64
buffer_size = 20000

dataset = tf.data.Dataset.from_tensor_slices((inputs, outputs))
dataset = dataset.cache()
dataset = dataset.shuffle(buffer_size).batch(batch_size)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

# Etapa 3: Construção do modelo

## Embedding

Positional encoding:

$PE_{(pos,2i)} =\sin(pos/10000^{2i/dmodel})$

$PE_{(pos,2i+1)} =\cos(pos/10000^{2i/dmodel})$

- The positional encodings have the same dimension dmodel
as the embeddings, so that the two can be summed

In [None]:
class PositionalEncoding(layers.Layer):

    def __init__(self):
      super(PositionalEncoding, self).__init__()

    def get_angles(self, pos, i, d_model):
      angles = 1 / np.power(10000., (2*(i // 2)) / np.float32(d_model))
      return pos * angles # (seq_lenght, d_model)

    def call(self, inputs):
      seq_lenght = inputs.shape.as_list()[-2]
      d_model = inputs.shape.as_list()[-1]
      angles = self.get_angles(np.arange(seq_lenght)[:, np.newaxis],
                               np.arange(d_model)[np.newaxis, :], d_model)
      angles[:, 0::2] = np.sin(angles[:, 0::2])
      angles[:, 1::2] = np.cos(angles[:, 1::2])
      pos_encoding = angles[np.newaxis, ...]
      return inputs + tf.cast(pos_encoding, tf.float32)

## Mecanismo de atenção

### Cálculo da atenção

$Attention(Q, K, V ) = \text{softmax}\left(\dfrac{QK^T}{\sqrt{d_k}}\right)V $

In [None]:
def scaled_dot_product_attention(queries, keys, values, mask):
  product = tf.matmul(queries, keys, transpose_b=True)
  keys_dim = tf.cast(tf.shape(keys)[-1], tf.float32)
  scaled_product = product / tf.math.sqrt(keys_dim)

  if mask is not None:
    scaled_product += (mask * -1e9) # 0.0000000001

  attention = tf.matmul(tf.nn.softmax(scaled_product, axis=-1), values)
  return attention

### Multi-head attention sublayer

In [None]:
class MultiHeadAttention(layers.Layer):

    def __init__(self, nb_proj):
      super(MultiHeadAttention, self).__init__()
      self.nb_proj = nb_proj

    def build(self, input_shape):
      self.d_model = input_shape[-1]
      assert self.d_model % self.nb_proj == 0

      self.d_proj = self.d_model // self.nb_proj

      self.query_lin = layers.Dense(units = self.d_model)
      self.key_lin = layers.Dense(units = self.d_model)
      self.value_lin = layers.Dense(units = self.d_model)

      self.final_lin = layers.Dense(units = self.d_model)

    def split_proj(self, inputs, batch_size): # inputs: (batch_size, seq_lenght, d_model)
      shape = (batch_size, -1, self.nb_proj, self.d_proj)
      splited_inputs = tf.reshape(inputs, shape = shape) # (batch_size, seq_lenght, nb_proj, d_proj)
      return tf.transpose(splited_inputs, perm=[0, 2, 1, 3]) # (batch_size, nb_proj, seq_lenght, d_proj)

    def call(self, queries, keys, values, mask):
      batch_size = tf.shape(queries)[0]

      queries = self.query_lin(queries)
      keys = self.key_lin(keys)
      values = self.value_lin(values)

      queries = self.split_proj(queries, batch_size)
      keys = self.split_proj(keys, batch_size)
      values = self.split_proj(values, batch_size)

      attention = scaled_dot_product_attention(queries, keys, values, mask)

      attention = tf.transpose(attention, perm=[0, 2, 1, 3])

      concat_attention = tf.reshape(attention, shape=(batch_size, -1, self.d_model))

      outputs = self.final_lin(concat_attention)

      return outputs

## Encoder

In [None]:
class EncoderLayer(layers.Layer):

    def __init__(self, FFN_units, nb_proj, dropout_rate):
      super(EncoderLayer, self).__init__()
      self.FFN_units = FFN_units
      self.nb_proj = nb_proj
      self.dropout_rate = dropout_rate

    def build(self, input_shape):
      self.d_model = input_shape[-1]

      self.multi_head_attention = MultiHeadAttention(self.nb_proj)
      self.dropout_1 = layers.Dropout(rate=self.dropout_rate)
      self.norm_1 = layers.LayerNormalization(epsilon=1e-6) # 0.0000001

      self.dense_1 = layers.Dense(units=self.FFN_units, activation='relu')
      self.dense_2 = layers.Dense(units=self.d_model, activation='relu')
      self.dropout_2 = layers.Dropout(rate=self.dropout_rate)

      self.norm_2 = layers.LayerNormalization(epsilon=1e-6)

    def call(self, inputs, mask, training):
      attention = self.multi_head_attention(inputs, inputs, inputs, mask)
      attention = self.dropout_1(attention, training = training)
      attention = self.norm_1(attention + inputs)

      outputs = self.dense_1(attention)
      outputs = self.dense_2(outputs)
      outputs = self.dropout_2(outputs, training=training)
      outputs = self.norm_2(outputs + attention)

      return outputs

In [None]:
class Encoder(layers.Layer):

    def __init__(self,
                 nb_layers,
                 FFN_units,
                 nb_proj,
                 dropout_rate,
                 vocab_size,
                 d_model,
                 name="encoder"):
      super(Encoder, self).__init__(name=name)
      self.nb_layers = nb_layers
      self.d_model = d_model

      self.embedding = layers.Embedding(vocab_size, d_model)
      self.pos_encoding = PositionalEncoding()
      self.dropout = layers.Dropout(rate=dropout_rate)
      self.enc_layers = [EncoderLayer(FFN_units, nb_proj, dropout_rate) for _ in range(nb_layers)]


    def call(self, inputs, mask, training):
      outputs = self.embedding(inputs)
      outputs *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
      outputs = self.pos_encoding(outputs)
      outputs = self.dropout(outputs, training)

      for i in range(self.nb_layers):
        outputs = self.enc_layers[i](outputs, mask, training)

      return outputs

## Decoder

In [None]:
class DecoderLayer(layers.Layer):

    def __init__(self, FFN_units, nb_proj, dropout_rate):
      super(DecoderLayer, self).__init__()
      self.FFN_units = FFN_units
      self.nb_proj = nb_proj
      self.dropout_rate = dropout_rate

    def build(self, input_shape):
      self.d_model = input_shape[-1]

      self.multi_head_attention_1 = MultiHeadAttention(self.nb_proj)
      self.dropout_1 = layers.Dropout(rate=self.dropout_rate)
      self.norm_1 = layers.LayerNormalization(epsilon=1e-6)

      self.multi_head_attention_2 = MultiHeadAttention(self.nb_proj)
      self.dropout_2 = layers.Dropout(rate=self.dropout_rate)
      self.norm_2 = layers.LayerNormalization(epsilon=1e-6)

      self.dense_1 = layers.Dense(units = self.FFN_units, activation='relu')
      self.dense_2 = layers.Dense(units = self.d_model, activation='relu')
      self.dropout_3 = layers.Dropout(rate=self.dropout_rate)
      self.norm_3 = layers.LayerNormalization(epsilon=1e-6)

    def call(self, inputs, enc_outputs, mask_1, mask_2, training):
      attention = self.multi_head_attention_1(inputs, inputs, inputs, mask_1)
      attention = self.dropout_1(attention, training)
      attention = self.norm_1(attention + inputs)

      attention_2 = self.multi_head_attention_2(attention, enc_outputs, enc_outputs, mask_2)
      attention_2 = self.dropout_2(attention_2, training)
      attention_2 = self.norm_2(attention_2 + attention)

      outputs = self.dense_1(attention_2)
      outputs = self.dense_2(outputs)
      outputs = self.dropout_3(outputs, training)
      outputs = self.norm_3(outputs + attention_2)

      return outputs

In [None]:
class Decoder(layers.Layer):

    def __init__(self,
                 nb_layers,
                 FFN_units,
                 nb_proj,
                 dropout_rate,
                 vocab_size,
                 d_model,
                 name="decoder"):
      super(Decoder, self).__init__(name=name)
      self.d_model = d_model
      self.nb_layers = nb_layers

      self.embedding = layers.Embedding(vocab_size, d_model)
      self.pos_encoding = PositionalEncoding()
      self.dropout = layers.Dropout(rate=dropout_rate)

      self.dec_layers = [DecoderLayer(FFN_units, nb_proj, dropout_rate) for i in range(nb_layers)]

    def call(self, inputs, enc_outputs, mask_1, mask_2, training):
      outputs = self.embedding(inputs)
      outputs *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
      outputs = self.pos_encoding(outputs)
      outputs = self.dropout(outputs, training)

      for i in range(self.nb_layers):
        outputs = self.dec_layers[i](outputs, enc_outputs, mask_1, mask_2, training)

      return outputs

## Transformer

- Matriz triangular: https://mundoeducacao.bol.uol.com.br/matematica/matriz-triangular.htm

In [None]:
class Transformer(tf.keras.Model):

    def __init__(self,
                 vocab_size_enc,
                 vocab_size_dec,
                 d_model,
                 nb_layers,
                 FFN_units,
                 nb_proj,
                 dropout_rate,
                 name="transformer"):
        super(Transformer, self).__init__(name=name)

        self.encoder = Encoder(nb_layers, FFN_units, nb_proj, dropout_rate,
                               vocab_size_enc, d_model)
        self.decoder = Decoder(nb_layers, FFN_units, nb_proj, dropout_rate,
                               vocab_size_dec, d_model)
        self.last_linear = layers.Dense(units=vocab_size_dec, name='lin_output')

    def create_padding_mask(self, seq): # (batch_size, seq_length) -> (batch_size, nb_proj, seq_lenght, d_proj)
      mask = tf.cast(tf.math.equal(seq, 0), tf.float32)
      return mask[:, tf.newaxis, tf.newaxis, :]

    def create_look_ahead_mask(self, seq):
      seq_len = tf.shape(seq)[1]
      look_ahed_mask = 1 - tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0)
      return look_ahed_mask

    def call(self, enc_inputs, dec_inputs, training):
      enc_mask = self.create_padding_mask(enc_inputs)
      dec_mask_1 = tf.maximum(self.create_padding_mask(dec_inputs), self.create_look_ahead_mask(dec_inputs))
      dec_mask_2 = self.create_padding_mask(enc_inputs)

      enc_outputs = self.encoder(enc_inputs, enc_mask, training)
      dec_outputs = self.decoder(dec_inputs, enc_outputs, dec_mask_1, dec_mask_2, training)

      outputs = self.last_linear(dec_outputs)

      return outputs

## Código somente para testes

In [None]:
# Código somente para testes

def create_padding_mask(seq): # (batch_size, seq_length) -> (batch_size, nb_proj, seq_lenght, d_proj)
  mask = tf.cast(tf.math.equal(seq, 0), tf.float32)
  return mask[:, tf.newaxis, tf.newaxis, :]

def create_look_ahead_mask(seq):
  seq_len = tf.shape(seq)[1]
  look_ahed_mask = 1 - tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0)
  return look_ahed_mask

In [None]:
create_padding_mask(seq)

NameError: ignored

In [None]:
create_look_ahead_mask(seq)

NameError: ignored

In [None]:
seq = tf.cast([[837, 836, 0, 273, 8, 0, 0, 0]], tf.int32)
tf.maximum(create_padding_mask(seq), create_look_ahead_mask(seq))

<tf.Tensor: shape=(1, 1, 8, 8), dtype=float32, numpy=
array([[[[0., 1., 1., 1., 1., 1., 1., 1.],
         [0., 0., 1., 1., 1., 1., 1., 1.],
         [0., 0., 1., 1., 1., 1., 1., 1.],
         [0., 0., 1., 0., 1., 1., 1., 1.],
         [0., 0., 1., 0., 0., 1., 1., 1.],
         [0., 0., 1., 0., 0., 1., 1., 1.],
         [0., 0., 1., 0., 0., 1., 1., 1.],
         [0., 0., 1., 0., 0., 1., 1., 1.]]]], dtype=float32)>

# Treinamento

In [None]:
tf.keras.backend.clear_session()

d_model = 128 # 512
nb_layers = 4 # 6
ffn_units = 512 # 2048
nb_proj = 8 # 8
dropout_rate = 0.1 # 0.1

In [None]:
transformer = Transformer(vocab_size_enc=vocab_size_en,
                          vocab_size_dec=vocab_size_pt,
                          d_model=d_model,
                          nb_layers=nb_layers,
                          FFN_units=ffn_units,
                          nb_proj=nb_proj,
                          dropout_rate=dropout_rate)

In [None]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True,
                                                            reduction='none')

In [None]:
def loss_function(target, pred):
  mask = tf.math.logical_not(tf.math.equal(target, 0))
  loss_ = loss_object(target, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask

  return tf.reduce_mean(loss_)

In [None]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

In [None]:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):

  def __init__(self, d_model, warmup_steps=4000):
    super(CustomSchedule, self).__init__()

    self.d_model = tf.cast(d_model, tf.float32)
    self.warmup_steps = warmup_steps

  def __call__(self, step):
    arg1 = tf.math.rsqrt(step)
    arg2 = step * (self.warmup_steps ** -1.5)

    return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

In [None]:
learning_rate = CustomSchedule(d_model)

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

In [None]:
checkpoint_path = "/content/drive/My Drive/tradutor"
ckpt = tf.train.Checkpoint(transformer=transformer,
                           optimizer=optimizer)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)
if ckpt_manager.latest_checkpoint:
  ckpt.restore(ckpt_manager.latest_checkpoint)
  print('Latest checkpoint restored')

Latest checkpoint restored


In [None]:
epochs = 10
for epoch in range(epochs):
  print('Start or epoch {}'.format(epoch + 1))
  start = time.time()

  train_loss.reset_states()
  train_accuracy.reset_states()

  for (batch, (enc_inputs, targets)) in enumerate(dataset):
    dec_inputs = targets[:, :-1]
    dec_outputs_real = targets[:, 1:]
    with tf.GradientTape() as tape:
      predictions = transformer(enc_inputs, dec_inputs, True)
      loss = loss_function(dec_outputs_real, predictions)

    gradients = tape.gradient(loss, transformer.trainable_variables)
    optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))

    train_loss(loss)
    train_accuracy(dec_outputs_real, predictions)

    if batch % 50 == 0:
      print('Epoch {} Batch {} Loss {:.4f} Accuracy {:.4f}'.format(epoch+1, batch, train_loss.result(), train_accuracy.result()))

  ckpt_save_path = ckpt_manager.save()
  print('Saving checkpoint for epoch {} at {}'.format(epoch + 1, ckpt_save_path))
  print('Time taken for 1 epoch {} secs\n'.format(time.time() - start))

Start or epoch 1
Epoch 1 Batch 0 Loss 6.6433 Accuracy 0.0000
Epoch 1 Batch 50 Loss 6.4206 Accuracy 0.0143
Epoch 1 Batch 100 Loss 6.3457 Accuracy 0.0426
Epoch 1 Batch 150 Loss 6.2395 Accuracy 0.0522




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































Epoch 1 Batch 200 Loss 6.1499 Accuracy 0.0569
Epoch 1 Batch 250 Loss 6.0423 Accuracy 0.0598
Epoch 1 Batch 300 Loss 5.9154 Accuracy 0.0618
Epoch 1 Batch 350 Loss 5.7831 Accuracy 0.0641
Epoch 1 Batch 400 Loss 5.6499 Accuracy 0.0705
Epoch 1 Batch 450 Loss 5.5249 Accuracy 0.0766
Epoch 1 Batch 500 Loss 5.4093 Accuracy 0.0820
Epoch 1 Batch 550 Loss 5.2986 Accuracy 0.0876
Epoch 1 Batch 600 Loss 5.1970 Accuracy 0.0932
Epoch 1 Batch 650 Loss 5.1016 Accuracy 0.0984
Epoch 1 Batch 700 Loss 5.0096 Accuracy 0.1034
Epoch 1 Batch 750 Loss 4.9260 Accuracy 0.1087
Epoch 1 Batch 800 Loss 4.8432 Accuracy 0.1141
Epoch 1 Batch 850 Loss 4.7705 Accuracy 0.1189
Epoch 1 Batch 900 Loss 4.6993 Accuracy 0.1234
Epoch 1 Batch 950 Loss 4.6312 Accuracy 0.1279
Epoch 1 Batch 1000 Loss 4.5677 Accuracy 0.1321
Epoch 1 Batch 1050 Loss 4.5061 Accuracy 0.1362
Epoch 1 Batch 1100 Loss 4.4449 Accuracy 0.1402
Epoch 1 Batch 1150 Loss 4.3872 Accuracy 0.1440
Epoch 1 Batch 1200 Loss 4.3317 Accuracy 0.1476
Epoch 1 Batch 1250 Loss 4.275

# Avaliação

In [None]:
text = 'you are smart'
text = [vocab_size_en - 2] + tokenizer_en.encode(text) + [vocab_size_en - 1]
text

[8191, 55, 17, 2202, 4099, 8192]

In [None]:
text = tf.expand_dims(text, axis=0)
text.shape

TensorShape([1, 6])

In [None]:
output = tf.expand_dims([vocab_size_pt - 2], axis = 0)
output.shape

TensorShape([1, 1])

In [None]:
def evaluate(inp_sentence):
  inp_sentence = [vocab_size_en - 2] + tokenizer_en.encode(inp_sentence) + [vocab_size_en - 1]
  enc_input = tf.expand_dims(inp_sentence, axis=0)

  output = tf.expand_dims([vocab_size_pt - 2], axis = 0)

  # i am -> am happy

  for _ in range(max_length):
    # (1, seq_length, vocab_size)
    predictions = transformer(enc_input, output, False)
    prediction = predictions[:, -1:, :]

    predicted_id = tf.cast(tf.argmax(prediction, axis=-1), tf.int32)

    if predicted_id == vocab_size_pt - 1:
      return tf.squeeze(output, axis=0)

    output = tf.concat([output, predicted_id], axis=1)

  return tf.squeeze(output, axis = 0)

In [None]:
def translate(sentence):
  output = evaluate(sentence).numpy()

  predicted_sentence = tokenizer_pt.decode([i for i in output if i < vocab_size_pt - 2])

  print('Input: {}'.format(sentence))
  print('Predicted translation: {}'.format(predicted_sentence))


In [None]:
translate("this is a really powerful tool")

Input: this is a really powerful tool
Predicted translation: Isto é também um instrumento poderoso.


Melhorias

- Utilizar a base de dados completa
- Aumentar o tamanho da frase
- Mudar os parâmetros