# Práctica 8 - Bidirectional LSTM

> **Intituto Politécnico Nacional**
>
> **Centro de Investigación en Computación**
>
> 
> 
> Departamento de Diplomados y Extensión Profesional
> 
> Diplomado en Inteligencia Artificial
> 
> * Módulo 11 - Parte II
> 
> Profesor: Alan Badillo Salas (badillosalas@outlook.com)

In [11]:
from tensorflow import keras

batch_size = 32

train_ds = keras.utils.text_dataset_from_directory(
    "aclImdb/train", batch_size=batch_size
)
val_ds = keras.utils.text_dataset_from_directory(
    "aclImdb/val", batch_size=batch_size
)
test_ds = keras.utils.text_dataset_from_directory(
    "aclImdb/test", batch_size=batch_size
)

Found 20000 files belonging to 2 classes.
Found 5000 files belonging to 2 classes.
Found 25000 files belonging to 2 classes.


In [12]:
text_only_train_ds = train_ds.map(lambda x, y: x)

In [None]:
max_length = 600
max_tokens = 500

text_vectorization = keras.layers.TextVectorization(
    max_tokens=max_tokens,
    output_mode="int",
    output_sequence_length=max_length,
)

text_vectorization.adapt(text_only_train_ds)

2025-09-04 13:31:15.871118: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [14]:
int_train_ds = train_ds.map(lambda x, y: (text_vectorization(x), y), num_parallel_calls=4)
int_val_ds = val_ds.map(lambda x, y: (text_vectorization(x), y), num_parallel_calls=4)
int_test_ds = test_ds.map(lambda x, y: (text_vectorization(x), y), num_parallel_calls=4)

In [23]:
import tensorflow as tf

@tf.keras.utils.register_keras_serializable(package="Custom")
class OneHotLayer(keras.layers.Layer):
    def __init__(self, depth, **kwargs):
        super().__init__(**kwargs)
        self.depth = depth

    def call(self, inputs):
        return tf.one_hot(inputs, depth=self.depth)

    def get_config(self):
        config = super().get_config()
        config.update({"depth": self.depth})
        return config

In [24]:
inputs = keras.Input(shape=(None,), dtype="int64")

# embedded = keras.layers.Lambda(
#     lambda x: tf.one_hot(x, depth=100),
#     output_shape=lambda s: (s[0], s[1], 100)  # salida: (batch, seq_len, max_tokens)
# )(inputs)
embedded = OneHotLayer(depth=max_tokens)(inputs)

x = keras.layers.Bidirectional(keras.layers.LSTM(32))(embedded)
x = keras.layers.Dropout(0.5)(x)

outputs = keras.layers.Dense(1, activation="sigmoid")(x)

model = keras.Model(inputs, outputs)

model.compile(
    optimizer="rmsprop",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

model.summary()

In [None]:
callbacks = [
    keras.callbacks.ModelCheckpoint("resultados/one_hot_bidir_lstm.keras", save_best_only=True)
]
model.fit(int_train_ds, validation_data=int_val_ds, epochs=10, callbacks=callbacks)

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 82ms/step - accuracy: 0.5694 - loss: 0.6742 - val_accuracy: 0.6432 - val_loss: 0.6526


<keras.src.callbacks.history.History at 0x34d6d7eb0>

In [26]:
max_tokens = 100
model = keras.models.load_model("resultados/one_hot_bidir_lstm.keras", safe_mode=False)
print(f"Test acc: {model.evaluate(int_test_ds)[1]:.3f}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 39ms/step - accuracy: 0.6390 - loss: 0.6570
Test acc: 0.639
