# Sentiment Analysis

In [None]:
import tensorflow_datasets as tfds
import tensorflow as tf

raw_train_set, raw_valid_set, raw_test_set = tfds.load(
    name="imdb_reviews",
    split=["train[:90%]", "train[90%:]", "test"],
    as_supervised=True
)

tf.random.set_seed(42)
train_set = raw_train_set.shuffle(5000, seed=42).batch(32).prefetch(1)
valid_set = raw_valid_set.batch(32).prefetch(1)
test_set = raw_test_set.batch(32).prefetch(1)



Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.M6UWMF_1.0.0/imdb_reviews-train.tfrecor…

Generating test examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.M6UWMF_1.0.0/imdb_reviews-test.tfrecord…

Generating unsupervised examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.M6UWMF_1.0.0/imdb_reviews-unsupervised.…

Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.


In [None]:
for review, label in raw_train_set.take(4):
  print(review.numpy().decode("utf-8")[:200], "...")
  print("Label: ", label.numpy())

This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting  ...
Label:  0
I have been known to fall asleep during films, but this is usually due to a combination of things including, really tired, being warm and comfortable on the sette and having just eaten a lot. However  ...
Label:  0
Mann photographs the Alberta Rocky Mountains in a superb fashion, and Jimmy Stewart and Walter Brennan give enjoyable performances as they always seem to do. <br /><br />But come on Hollywood - a Moun ...
Label:  0
This is the kind of film for a snowy Sunday afternoon when the rest of the world can go ahead with its own business as you descend into a big arm-chair and mellow for a couple of hours. Wonderful perf ...
Label:  1


In [None]:
vocab_size = 1000

text_vec_layer = tf.keras.layers.TextVectorization(max_tokens=vocab_size,
                                                   split = 'whitespace',
                                                   standardize="lower_and_strip_punctuation")
text_vec_layer.adapt(train_set.map(lambda review, label: review))

In [None]:
text_vec_layer.get_vocabulary()[:50]

['',
 '[UNK]',
 np.str_('the'),
 np.str_('and'),
 np.str_('a'),
 np.str_('of'),
 np.str_('to'),
 np.str_('is'),
 np.str_('in'),
 np.str_('it'),
 np.str_('i'),
 np.str_('this'),
 np.str_('that'),
 np.str_('br'),
 np.str_('was'),
 np.str_('as'),
 np.str_('for'),
 np.str_('with'),
 np.str_('movie'),
 np.str_('but'),
 np.str_('film'),
 np.str_('on'),
 np.str_('not'),
 np.str_('you'),
 np.str_('are'),
 np.str_('his'),
 np.str_('have'),
 np.str_('he'),
 np.str_('be'),
 np.str_('one'),
 np.str_('its'),
 np.str_('at'),
 np.str_('all'),
 np.str_('by'),
 np.str_('an'),
 np.str_('they'),
 np.str_('from'),
 np.str_('who'),
 np.str_('so'),
 np.str_('like'),
 np.str_('her'),
 np.str_('or'),
 np.str_('just'),
 np.str_('about'),
 np.str_('has'),
 np.str_('if'),
 np.str_('out'),
 np.str_('some'),
 np.str_('there'),
 np.str_('what')]

In [None]:
text_vec_layer.get_vocabulary()[-50:]

[np.str_('jane'),
 np.str_('brothers'),
 np.str_('battle'),
 np.str_('apart'),
 np.str_('mess'),
 np.str_('development'),
 np.str_('casting'),
 np.str_('potential'),
 np.str_('20'),
 np.str_('open'),
 np.str_('effect'),
 np.str_('christmas'),
 np.str_('expecting'),
 np.str_('dream'),
 np.str_('outside'),
 np.str_('remake'),
 np.str_('manages'),
 np.str_('forward'),
 np.str_('bill'),
 np.str_('twist'),
 np.str_('attempts'),
 np.str_('deserves'),
 np.str_('create'),
 np.str_('70s'),
 np.str_('unlike'),
 np.str_('imdb'),
 np.str_('dumb'),
 np.str_('powerful'),
 np.str_('portrayed'),
 np.str_('setting'),
 np.str_('missing'),
 np.str_('fire'),
 np.str_('fairly'),
 np.str_('scifi'),
 np.str_('inside'),
 np.str_('background'),
 np.str_('married'),
 np.str_('business'),
 np.str_('ben'),
 np.str_('mark'),
 np.str_('recently'),
 np.str_('fantasy'),
 np.str_('air'),
 np.str_('pay'),
 np.str_('gay'),
 np.str_('present'),
 np.str_('monster'),
 np.str_('fighting'),
 np.str_('joke'),
 np.str_('plain'

In [None]:
list(map(str, text_vec_layer.get_vocabulary()[:50]))

['',
 '[UNK]',
 'the',
 'and',
 'a',
 'of',
 'to',
 'is',
 'in',
 'it',
 'i',
 'this',
 'that',
 'br',
 'was',
 'as',
 'for',
 'with',
 'movie',
 'but',
 'film',
 'on',
 'not',
 'you',
 'are',
 'his',
 'have',
 'he',
 'be',
 'one',
 'its',
 'at',
 'all',
 'by',
 'an',
 'they',
 'from',
 'who',
 'so',
 'like',
 'her',
 'or',
 'just',
 'about',
 'has',
 'if',
 'out',
 'some',
 'there',
 'what']

In [None]:
text_vec_layer(['it was a great movie'])

<tf.Tensor: shape=(1, 5), dtype=int64, numpy=array([[ 9, 14,  4, 86, 18]])>

In [None]:
embed_layer = tf.keras.layers.Embedding(input_dim = vocab_size, output_dim = 64)
embed_layer(text_vec_layer(["it was a great movie"]))

<tf.Tensor: shape=(1, 5, 64), dtype=float32, numpy=
array([[[-3.2212771e-02, -2.3583686e-02,  4.3900013e-03, -3.7163317e-02,
         -1.2140773e-02,  4.5052771e-02,  2.8774962e-03,  6.3502565e-03,
         -2.4052486e-03,  4.1976657e-02,  2.9789057e-02, -3.0199898e-02,
          1.5198480e-02, -3.9034665e-02,  7.4714907e-03, -9.9413879e-03,
         -4.7572352e-02, -1.1720251e-02, -3.2492448e-02,  2.8531160e-02,
         -2.4245668e-02, -4.3790579e-02,  1.3221391e-03,  2.1503303e-02,
          4.0186536e-02,  6.4547881e-03,  3.5668816e-02,  3.8030516e-02,
          3.3851389e-02,  1.2797963e-02,  2.4193417e-02,  3.5799179e-02,
         -4.1638017e-03, -4.4524085e-02,  9.5183142e-03, -2.4888767e-02,
         -4.5868516e-02,  2.9601429e-02, -3.6479544e-02, -2.9595459e-02,
          2.4559129e-02,  3.1527910e-02,  3.5206508e-02, -2.8486736e-03,
         -1.0769866e-02,  3.9547827e-02, -3.9407920e-02, -4.8495878e-02,
          4.4018712e-02,  3.1819571e-02,  4.6544243e-02,  1.2316220e-03,

In [None]:
text_vec_layer

<TextVectorization name=text_vectorization, built=True>

In [None]:
embed_size = 128

model = tf.keras.Sequential([
    text_vec_layer,
    tf.keras.layers.Embedding(vocab_size, embed_size, mask_zero=True),
    tf.keras.layers.GRU(128),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(
    loss='binary_crossentropy',
    optimizer = tf.keras.optimizers.Nadam(),
    metrics = ['accuracy']
)

model.fit(train_set, epochs=2, validation_data=valid_set)


Epoch 1/2
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m773s[0m 1s/step - accuracy: 0.6697 - loss: 0.5832 - val_accuracy: 0.8372 - val_loss: 0.3805
Epoch 2/2
[1m565/704[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m2:29[0m 1s/step - accuracy: 0.8568 - loss: 0.3444

In [None]:
embedding_weights = model.layers[1].get_weights()[0]

In [None]:
import numpy as np
np.savetxt("embeddings.tsv", embedding_weights, delimiter="\t")

In [None]:
vocab = text_vec_layer.get_vocabulary()

with open("metadata.tsv", "w", encoding="utf-8") as f:
  for word in vocab:
    word = word if word.strip() != "" else "<PAD>"
    f.write(f"{word}\n")

# Generating Shakespearean Text Using a Character RNN

In [None]:
import tensorflow as tf

shakespeare_url = "https://homl.info/shakespeare"
filepath = tf.keras.utils.get_file("shakespeare.txt", shakespeare_url)
with open(filepath) as f:
  shakespeare_text = f.read()

Downloading data from https://homl.info/shakespeare
[1m1115394/1115394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
print(shakespeare_text[:80])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.


In [None]:
''.join(sorted(set(shakespeare_text.lower())))

"\n !$&',-.3:;?abcdefghijklmnopqrstuvwxyz"

In [None]:
text_vec_layer = tf.keras.layers.TextVectorization(split = 'character',
                                                   standardize = 'lower')

text_vec_layer.adapt(shakespeare_text)
encoded = text_vec_layer([shakespeare_text][0])

In [None]:
encoded

<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([21,  7, 10, ..., 22, 28, 12])>

In [None]:
encoded -= 2
vocab_size = text_vec_layer.vocabulary_size() - 2
dataset_size = len(encoded)

In [None]:
vocab_size

39

In [None]:
dataset_size

1115394

In [None]:
def to_dataset(sequence, length, seed = None, shuffle = False, batch_size = 32):
  ds = tf.data.Dataset.from_tensor_slices(sequence)
  ds = ds.window(length + 1, shift = 1, drop_remainder = True)
  ds = ds.flat_map(lambda window_ds: window_ds.batch(length + 1))
  if shuffle:
    ds = ds.shuffle(100_000, seed = seed)
  ds = ds.batch(batch_size)
  return ds.map(lambda window: (window[:, :-1], window[:, 1:])).prefetch(1)

In [None]:
length = 100
tf.random.set_seed(42)
train_set = to_dataset(encoded[:1_000_000], length=length, shuffle = True,
                       seed = 42)
valid_set = to_dataset(encoded[1_000_000:1_060_000], length=length)
test_set = to_dataset(encoded[1_060_000:], length=length)

In [None]:
tf.random.set_seed(42)

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=128),
    tf.keras.layers.GRU(128, return_sequences=True),
    tf.keras.layers.Dense(vocab_size, activation='softmax')
])

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='nadam',
    metrics=['accuracy']
    )

model_ckpt = tf.keras.callbacks.ModelCheckpoint(
    'my_shakespeare_model.keras',
    monitor='val_accuracy',
    save_best_only=True
)

history = model.fit(
    train_set,
    validation_data=valid_set,
    epochs=2
)

Epoch 1/2
  10558/Unknown [1m152s[0m 12ms/step - accuracy: 0.5342 - loss: 1.5474

KeyboardInterrupt: 

In [None]:
shakespeare_model = tf.keras.Sequential([
    text_vec_layer,
    tf.keras.layers.Lambda(lambda X: X - 2),
    model
])

In [None]:
y_proba = shakespeare_model.predict(tf.constant(['To be or not to b']))[0, -1]
y_pred = tf.argmax(y_proba)
text_vec_layer.get_vocabulary()[y_pred + 2]

# Generating Fake Shakespearean Text

In [None]:
log_probas = tf.math.log([[0.5, 0.3, 0.2]])
tf.random.categorical(log_probas, num_samples = 8)

In [None]:
def next_char(text, temperature = 1):
    text = tf.constant([text])
    y_proba = shakespeare_model.predict(text)[0, -1:]
    rescaled_logits = tf.math.log(y_proba) / temperature
    char_id = tf.random.categorical(rescaled_logits, num_samples = 1)[0, 0]
    return text_vec_layer.get_vocabulary()[char_id + 2]

In [None]:
def extent_text(text, chars = 50, temperature = 1):
    for _ in range(chars):
        text +=next_char(text, temperature)
    return text

In [None]:
extent_text('to be or not to b', chars = 100, temperature = 0.1)

# An Encoder-Decoder Network for Neural Machine Translation

In [None]:
from pathlib import Path
import tensorflow as tf

url = "https://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip"
path = tf.keras.utils.get_file("spa-eng.zip", origin=url, cache_dir="datasets", extract=True)

# Final corrected path
spa_txt_path = Path(path).parent / "spa-eng_extracted" / "spa-eng" / "spa.txt"

# Read the file
text = spa_txt_path.read_text(encoding='utf-8')
print(text[:500])  # Print first 500 characters as a quick check

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip
[1m2638744/2638744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Go.	Ve.
Go.	Vete.
Go.	Vaya.
Go.	Váyase.
Hi.	Hola.
Run!	¡Corre!
Run.	Corred.
Who?	¿Quién?
Fire!	¡Fuego!
Fire!	¡Incendio!
Fire!	¡Disparad!
Help!	¡Ayuda!
Help!	¡Socorro! ¡Auxilio!
Help!	¡Auxilio!
Jump!	¡Salta!
Jump.	Salte.
Stop!	¡Parad!
Stop!	¡Para!
Stop!	¡Pare!
Wait!	¡Espera!
Wait.	Esperen.
Go on.	Continúa.
Go on.	Continúe.
Hello!	Hola.
I ran.	Corrí.
I ran.	Corría.
I try.	Lo intento.
I won!	¡He ganado!
Oh no!	¡Oh, no!
Relax.	Tomátelo con soda.
Smile.	Sonríe.
Attack!	¡Al ataque!
Attack!	¡Atacad!
Ge


In [None]:
import numpy as np

text = text.replace("¡", "").replace("¿", "")
pairs = [line.split("\t") for line in text.splitlines()]
np.random.seed(42)
np.random.shuffle(pairs)
sentences_en, sentences_es = zip(*pairs)

In [None]:
for i in range(3):
    print(sentences_en[i], "=>", sentences_es[i])

How boring! => Qué aburrimiento!
I love sports. => Adoro el deporte.
Would you like to swap jobs? => Te gustaría que intercambiemos los trabajos?


In [None]:
vocab_size = 1000
max_length = 50
text_vec_layer_en = tf.keras.layers.TextVectorization(
    vocab_size, output_sequence_length = max_length)
text_vec_layer_es = tf.keras.layers.TextVectorization(
    vocab_size, output_sequence_length = max_length)
text_vec_layer_en.adapt(sentences_en)
text_vec_layer_es.adapt([f"startofseq {s} endofseq" for s in sentences_es])

In [None]:
text_vec_layer_en.get_vocabulary()[:10]

['',
 '[UNK]',
 np.str_('the'),
 np.str_('i'),
 np.str_('to'),
 np.str_('you'),
 np.str_('tom'),
 np.str_('a'),
 np.str_('is'),
 np.str_('he')]

In [None]:
text_vec_layer_es.get_vocabulary()[:10]

['',
 '[UNK]',
 np.str_('startofseq'),
 np.str_('endofseq'),
 np.str_('de'),
 np.str_('que'),
 np.str_('a'),
 np.str_('no'),
 np.str_('tom'),
 np.str_('la')]

In [None]:
X_train = tf.constant(sentences_en[:100_000])
X_valid = tf.constant(sentences_en[100_000:])
X_train_dec = tf.constant([f"startofseq {s}" for s in sentences_es[:100_000]])
X_valid_dec = tf.constant([f"startofseq {s}" for s in sentences_es[100_000:]])
Y_train = text_vec_layer_es([f"{s} endofseq" for s in sentences_es[:100_000]])
Y_valid = text_vec_layer_es([f"{s} endofseq" for s in sentences_es[100_000:]])

In [None]:
tf.random.set_seed(42)
encoder_inputs = tf.keras.layers.Input(shape = [], dtype = tf.string)
decoder_inputs = tf.keras.layers.Input(shape = [], dtype = tf.string)

In [None]:
embed_size = 128

encoder_input_ids = text_vec_layer_en(encoder_inputs)
decoder_input_ids = text_vec_layer_es(decoder_inputs)

encoder_embedding_layer = tf.keras.layers.Embedding(vocab_size,
                                                    output_dim = embed_size,
                                                    mask_zero = True)

decoder_embedding_layer = tf.keras.layers.Embedding(vocab_size,
                                                    output_dim = embed_size,
                                                    mask_zero = True)

encoder_embeddings = encoder_embedding_layer(encoder_input_ids)
decoder_embeddings = decoder_embedding_layer(decoder_input_ids)

In [None]:
encoder = tf.keras.layers.LSTM(512, return_state = True)
encoder_outputa, *encoder_state = encoder(encoder_embeddings)

In [None]:
decoder = tf.keras.layers.LSTM(512, return_sequences = True)
decoder_outputs = decoder(decoder_embeddings, initial_state = encoder_state)

In [None]:
output_layer = tf.keras.layers.Dense(vocab_size, activation = 'softmax')
Y_proba = output_layer(decoder_outputs)

In [None]:
model = tf.keras.Model(inputs = [encoder_inputs, decoder_inputs],
                       outputs = [Y_proba])

model.compile(loss = 'sparse_categorical_crossentropy',
              optimizer = 'nadam',
              metrics = ['accuracy'])

model.fit((X_train, X_train_dec), Y_train, epochs = 3,
          validation_data = ((X_valid, X_valid_dec), Y_valid))

Epoch 1/3
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 23ms/step - accuracy: 0.0518 - loss: 3.4867 - val_accuracy: 0.0748 - val_loss: 2.1288
Epoch 2/3
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 24ms/step - accuracy: 0.0793 - loss: 1.9539 - val_accuracy: 0.0874 - val_loss: 1.6101
Epoch 3/3
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 22ms/step - accuracy: 0.0915 - loss: 1.4664 - val_accuracy: 0.0928 - val_loss: 1.4140


<keras.src.callbacks.history.History at 0x7dcde59fc510>

In [None]:
def translate(sentence_en):
    translation = ""
    for word_idx in range(max_length):
        X = tf.constant([sentence_en])
        X_dec = tf.constant(np.array(['startofseq' + translation]))
        y_proba = model.predict((X, X_dec))[0, word_idx]
        predicted_word_id = np.argmax(y_proba)
        predicted_word = text_vec_layer_es.get_vocabulary()[predicted_word_id]
        if predicted_word == 'endofseq':
            break
        translation += " " + predicted_word
    return translation.strip()

In [None]:
translate("I like soccer")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 495ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


'me gusta el fútbol'

In [None]:
translate("I like soccer and going to the beach")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


'me gusta jugar al aeropuerto'

# Bidirectional RNNs

In [None]:
import tensorflow as tf

In [None]:
tf.random.set_seed(42)
encoder = tf.keras.layers.Bidirectional(
    tf.keras.layers.LSTM(256, return_state = (True))
)

In [None]:
# encoder_outputs, *encoder_state = encoder(encoder_embeddings)
# encoder_state = [tf.concat(encoder_state[::2], axis = -1), #short-term ()
#                 tf.concat(encoder_state[1::2], axis = -1)]  #long-term (1 & 3)

In [None]:
class ConcatenateStates(tf.keras.layers.Layer):
    def __init__(self):
        super().__init__()
    def call(self, encoder_state):
        return [tf.concat(encoder_state[::2], axis = -1), #short-term ()
                tf.concat(encoder_state[1::2], axis = -1)]  #long-term (1 & 3)

encoder_outputs, *encoder_state = encoder(encoder_embeddings)
concat_states = ConcatenateStates()
encoder_state = concat_states(encoder_state)

07/05/2025

In [None]:
decoder = tf.keras.layers.LSTM(512, return_sequences=True)
decoder_outputs = decoder(decoder_embeddings, initial_state=encoder_state)

output_layer = tf.keras.layers.Dense(vocab_size, activation='softmax')
Y_proba = output_layer(decoder_outputs)

model = tf.keras.Model(inputs=[encoder_inputs, decoder_inputs], outputs=[Y_proba])

model.compile(loss='sparse_categorical_crossentropy',
              optimizer='nadam',
              metrics=['accuracy'])

model.fit((X_train, X_train_dec), Y_train, epochs=3,
          validation_data=((X_valid, X_valid_dec), Y_valid))

Epoch 1/3
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 26ms/step - accuracy: 0.0646 - loss: 2.8415 - val_accuracy: 0.0891 - val_loss: 1.5488
Epoch 2/3
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 25ms/step - accuracy: 0.0926 - loss: 1.4379 - val_accuracy: 0.0950 - val_loss: 1.3284
Epoch 3/3
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 26ms/step - accuracy: 0.1001 - loss: 1.1653 - val_accuracy: 0.0966 - val_loss: 1.2692


<keras.src.callbacks.history.History at 0x7dcdd04bcb10>

In [None]:
translate('I like soccer')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 452ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


'me gusta el fútbol'

In [None]:
translate('I like cats and dogs')

# Beam Search

In [None]:
def beam_search(sentence_en, beam_width, verbose=False):
    X = tf.constant([sentence_en])  # encoder input
    X_dec = tf.constant(["startofseq"])  # decoder input
    y_proba = model.predict((X, X_dec))[0, 0]  # first token's probas
    top_k = tf.math.top_k(y_proba, k=beam_width)
    top_translations = [  # list of best (log_proba, translation)
        (np.log(word_proba), text_vec_layer_es.get_vocabulary()[word_id])
        for word_proba, word_id in zip(top_k.values, top_k.indices)
    ]

    # extra code – displays the top first words in verbose mode
    if verbose:
        print("Top first words:", top_translations)

    for idx in range(1, max_length):
        candidates = []
        for log_proba, translation in top_translations:
            if translation.endswith("endofseq"):
                candidates.append((log_proba, translation))
                continue  # translation is finished, so don't try to extend it
            X = tf.constant([sentence_en])  # encoder input
            X_dec = tf.constant(["startofseq " + translation])  # decoder input
            y_proba = model.predict((X, X_dec))[0, idx]  # last token's proba
            for word_id, word_proba in enumerate(y_proba):
                word = text_vec_layer_es.get_vocabulary()[word_id]
                candidates.append((log_proba + np.log(word_proba),
                                   f"{translation} {word}"))
        top_translations = sorted(candidates, reverse=True)[:beam_width]

        # extra code – displays the top translation so far in verbose mode
        if verbose:
            print("Top translations so far:", top_translations)

        if all([tr.endswith("endofseq") for _, tr in top_translations]):
            return top_translations[0][1].replace("endofseq", "").strip()

In [None]:
sentence_en = 'I like cats and dogs'
translate(sentence_en)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


'me gustan los perros y los gatos'

In [None]:
beam_search(sentence_en, beam_width=3, verbose=True)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Top first words: [(np.float32(-0.082366034), np.str_('me')), (np.float32(-4.390876), np.str_('soy')), (np.float32(-4.577242), np.str_('les'))]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Top translations so far: [(np.float32(-0.22111398), 'me gustan'), (np.float32(-2.1524446), 'me gusta'), (np.float32(-4.629883), 'soy como')]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Top translations so far: [(np.float32(-0.32862926), 'me gustan los'), (np.float32(-2.6506011), 'me gusta tanto'), (np.float32(-2.9801757), 'me gustan las')]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

'me gustan los perros y los gatos'

# Attention Mechanisms

In [None]:
tf.random.set_seed(42)
encoder_inputs = tf.keras.layers.Input(shape=[], dtype=tf.string)
decoder_inputs = tf.keras.layers.Input(shape=[], dtype=tf.string)


In [None]:
embed_size = 128

encoder_input_ids = text_vec_layer_en(encoder_inputs)
decoder_input_ids = text_vec_layer_es(decoder_inputs)

encoder_embedding_layer = tf.keras.layers.Embedding(vocab_size,
                                                    output_dim=embed_size)

decoder_embedding_layer = tf.keras.layers.Embedding(vocab_size,
                                                    output_dim=embed_size)

encoder_embeddings = encoder_embedding_layer(encoder_input_ids)
decoder_embeddings = decoder_embedding_layer(decoder_input_ids)

In [None]:
encoder = tf.keras.layers.Bidirectional(
    tf.keras.layers.LSTM(256, return_sequences=True, return_state=True)
)
encoder_outputs, *encoder_state = encoder(encoder_embeddings)
encoder_state = concat_states(encoder_state)

In [None]:
decoder = tf.keras.layers.LSTM(512, return_sequences=True)
decoder_outputs = decoder(decoder_embeddings, initial_state = encoder_state)

In [None]:
attention_layer = tf.keras.layers.Attention()
attention_outputs = attention_layer([decoder_outputs, encoder_outputs])

output_layer = tf.keras.layers.Dense(vocab_size, activation='softmax')
Y_proba = output_layer(attention_outputs)

model = tf.keras.Model(inputs = [encoder_inputs, decoder_inputs],
                       outputs=[Y_proba])

model.compile(loss='sparse_categorical_crossentropy', optimizer = 'nadam',
              metrics=['accuracy'])

model.fit((X_train, X_train_dec), Y_train, epochs=3, validation_data=((X_valid, X_valid_dec), Y_valid))

Epoch 1/3
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 29ms/step - accuracy: 0.8876 - loss: 0.7189 - val_accuracy: 0.9085 - val_loss: 0.4844
Epoch 2/3
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 27ms/step - accuracy: 0.9139 - loss: 0.4453 - val_accuracy: 0.9280 - val_loss: 0.3321
Epoch 3/3
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 28ms/step - accuracy: 0.9317 - loss: 0.3092 - val_accuracy: 0.9401 - val_loss: 0.2571


<keras.src.callbacks.history.History at 0x7dcd65b500d0>

In [None]:
translate('I love cats and dogs')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 415ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


'me gustan los gatos y los gatos'

In [None]:
translate('I love soccer and also going to the beach')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


'voy a jugar a jugar al aeropuerto a jugar al aeropuerto'

In [None]:
translate('I love going to the beach')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step


'voy a ir a la playa'

# Transformer model

In [None]:
import tensorflow as tf

vocab_size = 10000
max_length = 50
embed_size = 128
num_heads = 5
ff_dim = 512


# Input layers
encoder_inputs = tf.keras.Input(shape=(None, ), dtype = tf.int32, name='encoder_inputs')
decoder_inputs = tf.keras.Input(shape=(None, ), dtype = tf.int32, name='decoder_inputs')


# Embedding layer
encoder_embedding_layer = tf.keras.layers.Embedding(vocab_size,
                                                    output_dim = embed_size,
                                                    mask_zero = True)


decoder_embedding_layer = tf.keras.layers.Embedding(vocab_size,
                                                    output_dim = embed_size,
                                                    mask_zero = True)

encoder_embeddings = encoder_embedding_layer(encoder_inputs)
decoder_embeddings = decoder_embedding_layer(decoder_inputs)


# Positional Embedding
pos_embedding_layer = tf.keras.layers.Embedding(max_length, embed_size)
positions_encoder = tf.keras.layers.Lambda(lambda x: tf.range(start=0, limit = tf.shape(x)[1], delta=1))(encoder_inputs)
positions_decoder = tf.keras.layers.Lambda(lambda x: tf.range(start=0, limit = tf.shape(x)[1], delta=1))(decoder_inputs)
pos_embed_enc = pos_embedding_layer(positions_encoder)
pos_embed_dec = pos_embedding_layer(positions_decoder)



# Adding positions and token embeddings
encoder_embed = encoder_embeddings + pos_embed_enc
decoder_embed = decoder_embeddings + pos_embed_dec



# Encoder self-attention
encoder_attention = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_size)(encoder_embed, encoder_embed)
encoder_attention = tf.keras.layers.LayerNormalization(epsilon=1e-6)(encoder_embed + encoder_attention)


# Encoder feed-forward
encoder_ff = tf.keras.layers.Dense(ff_dim, activation='relu')(encoder_attention)
encoder_ff = tf.keras.layers.Dense(embed_size)(encoder_ff)
encoder_outputs = tf.keras.layers.LayerNormalization(epsilon=1e-6)(encoder_attention + encoder_ff)


# Decoder self-attention
causal_mask = tf.keras.layers.Lambda(
    lambda x: tf.linalg.band_part(tf.ones((tf.shape(x)[1], tf.shape(x)[1])), -1, 0)
)(decoder_inputs)

decoder_attention = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, \
                                                       key_dim=embed_size)(decoder_embed, decoder_embed, attention_mask=causal_mask)
decoder_attention = tf.keras.layers.LayerNormalization(epsilon=1e-6)(decoder_embed + decoder_attention)



# Encoder Decoder cross-attention
cross_attention = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_size)(decoder_attention, encoder_outputs, encoder_outputs)
decoder_cross = tf.keras.layers.LayerNormalization(epsilon=1e-6)(decoder_attention + cross_attention)


# Decoder feed-forward
decoder_ff = tf.keras.layers.Dense(ff_dim, activation='relu')(decoder_cross)
decoder_ff = tf.keras.layers.Dense(embed_size)(decoder_ff)
decoder_outputs = tf.keras.layers.LayerNormalization(epsilon=1e-6)(decoder_cross + decoder_ff)


# Final output-layer
output_logits = tf.keras.layers.Dense(vocab_size, activation='softmax')(decoder_outputs)
transformer = tf.keras.Model([encoder_inputs, decoder_inputs], output_logits)

In [None]:
transformer.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='nadam',
    metrics=['accuracy']
)

In [None]:
vocab_size = 10000
max_length = 50

text_vec_layer_en = tf.keras.layers.TextVectorization(
    vocab_size, output_sequence_length = max_length,
    pad_to_max_tokens = True
)

text_vec_layer_es = tf.keras.layers.TextVectorization(
    vocab_size, output_sequence_length = max_length,
    pad_to_max_tokens = True
)

text_vec_layer_en.adapt(sentences_en)
text_vec_layer_es.adapt([f'startofseq {s} endofseq' for s in sentences_es])

In [None]:
X_train_padded = tf.keras.preprocessing.sequence.pad_sequences(
    text_vec_layer_en(X_train).numpy(), padding='post', maxlen=max_length
)
X_train_dec_padded = tf.keras.preprocessing.sequence.pad_sequences(
    text_vec_layer_es(X_train_dec).numpy(), padding='post', maxlen=max_length
)

X_valid_padded = tf.keras.preprocessing.sequence.pad_sequences(
    text_vec_layer_en(X_valid).numpy(), padding='post', maxlen=max_length
)
X_valid_dec_padded = tf.keras.preprocessing.sequence.pad_sequences(
    text_vec_layer_en(X_valid_dec).numpy(), padding='post', maxlen=max_length
)


X_train_padded = tf.constant(X_train_padded)
X_train_dec_padded = tf.constant(X_train_dec_padded)
X_valid_padded = tf.constant(X_valid_padded)
X_valid_dec_padded = tf.constant(X_valid_dec_padded)



transformer.fit(
    (X_train_padded, X_train_dec_padded),
    Y_train,
    epochs=3,
    validation_data=((X_valid_padded, X_valid_dec_padded), Y_valid)
)


Epoch 1/3
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 21ms/step - accuracy: 0.9065 - loss: 0.8495 - val_accuracy: 0.8860 - val_loss: 0.9589
Epoch 2/3
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 21ms/step - accuracy: 0.9532 - loss: 0.2295 - val_accuracy: 0.8845 - val_loss: 1.0541
Epoch 3/3
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 21ms/step - accuracy: 0.9615 - loss: 0.1655 - val_accuracy: 0.8870 - val_loss: 1.0365


<keras.src.callbacks.history.History at 0x7ccf0b7b8090>

In [None]:
import numpy as np

def translate(sentence_en):
    # Tokenize and pad encoder input
    X = text_vec_layer_en(tf.constant([sentence_en]))
    X = tf.keras.preprocessing.sequence.pad_sequences(X.numpy(), padding="post", maxlen=max_length)

    # Start token
    start_token = text_vec_layer_es([ 'startofseq'])[0][0]
    end_token = text_vec_layer_es(['endofseq'])[0][0]

    # Decoder input initialized with just the start token
    decoder_input = [start_token]

    for _ in range(max_length):
        decoder_input_padded = tf.keras.preprocessing.sequence.pad_sequences(
            [decoder_input], maxlen=max_length, padding="post"
        )

        y_proba = transformer.predict((X, decoder_input_padded), verbose=0)[0, len(decoder_input)-1]
        predicted_word_id = np.argmax(y_proba)

        if predicted_word_id == end_token:
            break

        decoder_input.append(predicted_word_id)

    # Map tokens back to words
    vocab = text_vec_layer_es.get_vocabulary()
    translated_words = [vocab[token] for token in decoder_input[1:]]  # skip start token

    return ' '.join(translated_words)

In [None]:
print(translate('I like cats and dogs'))

me gustan los gatos y los perros


In [None]:
print(translate('Yesterday I had a steak for lunch'))

ayer tuve un filete para almorzar


In [None]:
print(translate('Tomorrow I will go to the World Cup Finals'))

mañana me iré a la taza de semana
