# 3. LSTMs in TensorFlow


In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TextVectorization, Embedding, LSTM, Dense, Bidirectional, Dropout
from tensorflow.keras.regularizers import L1, L2, L1L2
from tensorflow.keras.optimizers import Adam, RMSprop

In [None]:
imdb_train = tfds.load(name="imdb_reviews", split="train")
imdb_train_df = tfds.as_dataframe(imdb_train)
imdb_train_df['text'] = imdb_train_df['text'].str.decode('utf-8')

imdb_test = tfds.load(name="imdb_reviews", split="test")
imdb_test_df = tfds.as_dataframe(imdb_test)
imdb_test_df['text'] = imdb_test_df['text'].str.decode('utf-8')

X_train = imdb_train_df['text']
y_train = imdb_train_df['label']
X_test = imdb_test_df['text']
y_test = imdb_test_df['label']

In [None]:
max_tokens = 20000
output_sequence_length = 256
output_dim = 128

vectorizer_layer = TextVectorization(max_tokens=max_tokens,
                                     output_mode='int',
                                     standardize='lower_and_strip_punctuation',
                                     ngrams=1,
                                     output_sequence_length=output_sequence_length)
vectorizer_layer.adapt(X_train)

embedding_layer = Embedding(input_dim=max_tokens,
                            output_dim=output_dim,
                            input_length=output_sequence_length)

In [None]:
simple_lstm = Sequential()
simple_lstm.add(Input(shape=(1,), dtype=tf.string))
simple_lstm.add(vectorizer_layer)
simple_lstm.add(embedding_layer)
simple_lstm.add(LSTM(128))
simple_lstm.add(Dense(64, activation='relu'))
simple_lstm.add(Dense(1, activation='sigmoid'))

simple_lstm.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, 256)              0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, 256, 128)          2560000   
                                                                 
 lstm (LSTM)                 (None, 128)               131584    
                                                                 
 dense (Dense)               (None, 64)                8256      
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
Total params: 2,699,905
Trainable params: 2,699,905
Non-trainable params: 0
______________________________________________

In [None]:
opt = Adam(learning_rate=0.001)
simple_lstm.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
simple_lstm.fit(X_train, y_train, epochs=5)

test_loss, test_acc = simple_lstm.evaluate(X_test, y_test)
print(f"Test set accuracy: {test_acc}")

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test set accuracy: 0.8588799834251404


# 4. Single-layer Bidirectional LSTMs

In [None]:
sl_bi_lstm = Sequential()
sl_bi_lstm.add(Input(shape=(1,), dtype=tf.string))
sl_bi_lstm.add(vectorizer_layer)
sl_bi_lstm.add(embedding_layer)
sl_bi_lstm.add(Bidirectional(LSTM(128)))
sl_bi_lstm.add(Dense(64, activation='relu', kernel_regularizer=L1L2(0.005)))
sl_bi_lstm.add(Dense(32, activation='relu', kernel_regularizer=L2(0.005)))
sl_bi_lstm.add(Dense(16, activation='relu', kernel_regularizer=L2(0.01)))
sl_bi_lstm.add(Dense(1, activation='sigmoid'))

sl_bi_lstm.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, 256)              0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, 256, 128)          2560000   
                                                                 
 bidirectional (Bidirectiona  (None, 256)              263168    
 l)                                                              
                                                                 
 dense_2 (Dense)             (None, 64)                16448     
                                                                 
 dense_3 (Dense)             (None, 32)                2080      
                                                                 
 dense_4 (Dense)             (None, 16)               

In [None]:
opt = Adam(learning_rate=0.001)
sl_bi_lstm.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
sl_bi_lstm.fit(X_train, y_train, epochs=3)

test_loss, test_acc = sl_bi_lstm.evaluate(X_test, y_test)
print(f"Test set accuracy: {test_acc}")

Epoch 1/3
Epoch 2/3
Epoch 3/3
Test set accuracy: 0.8566799759864807


# 5. Multilayer Bidirectional LSTMs

In [None]:
ml_bi_lstm = Sequential()
ml_bi_lstm.add(Input(shape=(1,), dtype=tf.string))
ml_bi_lstm.add(vectorizer_layer)
ml_bi_lstm.add(embedding_layer)
ml_bi_lstm.add(Bidirectional(LSTM(4, return_sequences=True)))
ml_bi_lstm.add(Bidirectional(LSTM(4, return_sequences=True)))
ml_bi_lstm.add(Bidirectional(LSTM(4)))
ml_bi_lstm.add(Dense(8, activation='elu', kernel_regularizer=L1L2(0.1)))
ml_bi_lstm.add(Dense(8, activation='elu', kernel_regularizer=L2(0.1)))
ml_bi_lstm.add(Dense(8, activation='elu', kernel_regularizer=L2(0.05)))
ml_bi_lstm.add(Dense(1, activation='sigmoid'))

ml_bi_lstm.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, 256)              0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, 256, 128)          2560000   
                                                                 
 bidirectional_1 (Bidirectio  (None, 256, 8)           4256      
 nal)                                                            
                                                                 
 bidirectional_2 (Bidirectio  (None, 256, 8)           416       
 nal)                                                            
                                                                 
 bidirectional_3 (Bidirectio  (None, 8)                416       
 nal)                                                 

In [None]:
opt = RMSprop(learning_rate=0.0001, rho=0.8, momentum=0.9)
ml_bi_lstm.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
ml_bi_lstm.fit(X_train, y_train, epochs=3)

test_loss, test_acc = ml_bi_lstm.evaluate(X_test, y_test)
print(f"Test set accuracy: {test_acc}")

Epoch 1/3
Epoch 2/3
Epoch 3/3
Test set accuracy: 0.8012800216674805


# 6. Hyperparameter Tuning


In [None]:
ml_bi_lstm_sum = Sequential()
ml_bi_lstm_sum.add(Input(shape=(1,), dtype=tf.string))
ml_bi_lstm_sum.add(vectorizer_layer)
ml_bi_lstm_sum.add(embedding_layer)
ml_bi_lstm_sum.add(Bidirectional(LSTM(8, return_sequences=True), merge_mode='sum'))
ml_bi_lstm_sum.add(Bidirectional(LSTM(4)))
ml_bi_lstm_sum.add(Dense(16, activation='relu'))
ml_bi_lstm_sum.add(Dropout(0.5))
ml_bi_lstm_sum.add(Dense(1, activation='sigmoid'))

ml_bi_lstm_sum.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, 256)              0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, 256, 128)          2560000   
                                                                 
 bidirectional_4 (Bidirectio  (None, 256, 8)           8768      
 nal)                                                            
                                                                 
 bidirectional_5 (Bidirectio  (None, 8)                416       
 nal)                                                            
                                                                 
 dense_10 (Dense)            (None, 16)                144       
                                                      

In [None]:
opt = Adam(learning_rate=0.001)
ml_bi_lstm_sum.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
ml_bi_lstm_sum.fit(X_train, y_train, epochs=2)

test_loss, test_acc = ml_bi_lstm_sum.evaluate(X_test, y_test)
print(f"Test set accuracy: {test_acc}")

Epoch 1/2
Epoch 2/2
Test set accuracy: 0.8102399706840515


In [None]:
ml_bi_lstm_ave = Sequential()
ml_bi_lstm_ave.add(Input(shape=(1,), dtype=tf.string))
ml_bi_lstm_ave.add(vectorizer_layer)
ml_bi_lstm_ave.add(embedding_layer)
ml_bi_lstm_ave.add(Bidirectional(LSTM(8, return_sequences=True), merge_mode='ave'))
ml_bi_lstm_ave.add(Bidirectional(LSTM(4)))
ml_bi_lstm_ave.add(Dense(16, activation='elu'))
ml_bi_lstm_ave.add(Dropout(0.5))
ml_bi_lstm_ave.add(Dense(1, activation='sigmoid'))

ml_bi_lstm_ave.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, 256)              0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, 256, 128)          2560000   
                                                                 
 bidirectional_6 (Bidirectio  (None, 256, 8)           8768      
 nal)                                                            
                                                                 
 bidirectional_7 (Bidirectio  (None, 8)                416       
 nal)                                                            
                                                                 
 dense_12 (Dense)            (None, 16)                144       
                                                      

In [None]:
opt = Adam(learning_rate=0.001)
ml_bi_lstm_ave.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
ml_bi_lstm_ave.fit(X_train, y_train, epochs=2)

test_loss, test_acc = ml_bi_lstm_ave.evaluate(X_test, y_test)
print(f"Test set accuracy: {test_acc}")

Epoch 1/2
Epoch 2/2
Test set accuracy: 0.813319981098175


In [None]:
ml_bi_lstm_mul = Sequential()
ml_bi_lstm_mul.add(Input(shape=(1,), dtype=tf.string))
ml_bi_lstm_mul.add(vectorizer_layer)
ml_bi_lstm_mul.add(embedding_layer)
ml_bi_lstm_mul.add(Bidirectional(LSTM(8, return_sequences=True), merge_mode='mul'))
ml_bi_lstm_mul.add(Bidirectional(LSTM(4)))
ml_bi_lstm_mul.add(Dense(16, activation='elu'))
ml_bi_lstm_mul.add(Dropout(0.5))
ml_bi_lstm_mul.add(Dense(1, activation='sigmoid'))

ml_bi_lstm_mul.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, 256)              0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, 256, 128)          2560000   
                                                                 
 bidirectional_8 (Bidirectio  (None, 256, 8)           8768      
 nal)                                                            
                                                                 
 bidirectional_9 (Bidirectio  (None, 8)                416       
 nal)                                                            
                                                                 
 dense_14 (Dense)            (None, 16)                144       
                                                      

In [None]:
opt = Adam(learning_rate=0.001)
ml_bi_lstm_mul.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
ml_bi_lstm_mul.fit(X_train, y_train, epochs=2)

test_loss, test_acc = ml_bi_lstm_mul.evaluate(X_test, y_test)
print(f"Test set accuracy: {test_acc}")

Epoch 1/2
Epoch 2/2
Test set accuracy: 0.8291599750518799
