In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np

def load_data():
    dataset_name = "ted_hrlr_translate/pt_to_en"
    examples, metadata = tfds.load(dataset_name, as_supervised=True, with_info=True)
    return examples["train"], examples["validation"]

def tokenize_text(text, tokenizer):
    return tokenizer.texts_to_sequences(text)

def prepare_data():
    train_data, val_data = load_data()
    tokenizer_pt = tfds.deprecated.text.SubwordTextEncoder.build_from_corpus(
        (pt.numpy() for pt, en in train_data), target_vocab_size=2**13)
    tokenizer_en = tfds.deprecated.text.SubwordTextEncoder.build_from_corpus(
        (en.numpy() for pt, en in train_data), target_vocab_size=2**13)
    return train_data, val_data, tokenizer_pt, tokenizer_en


In [2]:
def scaled_dot_product_attention(q, k, v, mask):
    matmul_qk = tf.matmul(q, k, transpose_b=True)
    depth = tf.cast(tf.shape(k)[-1], tf.float32)
    scaled_attention_logits = matmul_qk / tf.math.sqrt(depth)
    if mask is not None:
        scaled_attention_logits += (mask * -1e9)
    attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
    output = tf.matmul(attention_weights, v)
    return output