# Práctica 9 - Embedding LSTM

> **Intituto Politécnico Nacional**
>
> **Centro de Investigación en Computación**
>
> 
> 
> Departamento de Diplomados y Extensión Profesional
> 
> Diplomado en Inteligencia Artificial
> 
> * Módulo 11 - Parte II
> 
> Profesor: Alan Badillo Salas (badillosalas@outlook.com)

In [15]:
from tensorflow import keras

batch_size = 32

train_ds = keras.utils.text_dataset_from_directory(
    "aclImdb/train", batch_size=batch_size
)
val_ds = keras.utils.text_dataset_from_directory(
    "aclImdb/val", batch_size=batch_size
)
test_ds = keras.utils.text_dataset_from_directory(
    "aclImdb/test", batch_size=batch_size
)

Found 20000 files belonging to 2 classes.
Found 5000 files belonging to 2 classes.
Found 25000 files belonging to 2 classes.


In [16]:
text_only_train_ds = train_ds.map(lambda x, y: x)

In [None]:
max_length = 600
max_tokens = 500

text_vectorization = keras.layers.TextVectorization(
    max_tokens=max_tokens,
    output_mode="int",
    output_sequence_length=max_length,
)

text_vectorization.adapt(text_only_train_ds)

2025-09-04 17:16:14.708014: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [18]:
int_train_ds = train_ds.map(lambda x, y: (text_vectorization(x), y), num_parallel_calls=4)
int_val_ds = val_ds.map(lambda x, y: (text_vectorization(x), y), num_parallel_calls=4)
int_test_ds = test_ds.map(lambda x, y: (text_vectorization(x), y), num_parallel_calls=4)

In [19]:
inputs = keras.Input(shape=(None,), dtype="int64")

embedded = keras.layers.Embedding(input_dim=max_tokens, output_dim=256)(inputs)

x = keras.layers.Bidirectional(keras.layers.LSTM(32))(embedded)
x = keras.layers.Dropout(0.5)(x)

outputs = keras.layers.Dense(1, activation="sigmoid")(x)

model = keras.Model(inputs, outputs)

model.compile(
    optimizer="rmsprop",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

model.summary()

In [20]:
callbacks = [
    keras.callbacks.ModelCheckpoint("resultados/embeddings_bidir_lstm.keras", save_best_only=True)
]
model.fit(int_train_ds, validation_data=int_val_ds, epochs=10, callbacks=callbacks)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 142ms/step - accuracy: 0.6401 - loss: 0.6287 - val_accuracy: 0.7086 - val_loss: 0.5592
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 138ms/step - accuracy: 0.7574 - loss: 0.5286 - val_accuracy: 0.8004 - val_loss: 0.4618
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 134ms/step - accuracy: 0.7950 - loss: 0.4751 - val_accuracy: 0.8210 - val_loss: 0.4138
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 133ms/step - accuracy: 0.8185 - loss: 0.4287 - val_accuracy: 0.8278 - val_loss: 0.3927
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 132ms/step - accuracy: 0.8255 - loss: 0.4130 - val_accuracy: 0.8380 - val_loss: 0.3875
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 135ms/step - accuracy: 0.8353 - loss: 0.3947 - val_accuracy: 0.8382 - val_loss: 0.4051
Epoch 7/10

<keras.src.callbacks.history.History at 0x348e34f10>

In [21]:
model = keras.models.load_model("resultados/embeddings_bidir_lstm.keras", safe_mode=False)
print(f"Test acc: {model.evaluate(int_test_ds)[1]:.3f}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 34ms/step - accuracy: 0.8390 - loss: 0.3790
Test acc: 0.839
