In [2]:
import tensorflow as tf

## NOTE
All models below are prone to very high overfitting! In order to avoid that, you may use `tf.keras.layers.Dropout(0.2)` just after `Embedding()` layer.

## NOTE2
You can find notebooks with examples of these model by this link:  
https://www.coursera.org/learn/natural-language-processing-tensorflow/supplement/TAAsf/exploring-different-sequence-models

# 1-layer bidirectional LSTM
Here's valid example of consequently using some hyperparameters in Bidirectional LSTM network

In [4]:
VOCAB_SIZE = 1000
EMB_DIM = 16

t = tf.keras.preprocessing.text.Tokenizer(num_words=VOCAB_SIZE)

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(VOCAB_SIZE, EMB_DIM, input_length=120),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [22]:
model.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_9 (Embedding)      (None, 120, 16)           16000     
_________________________________________________________________
bidirectional_9 (Bidirection (None, 64)                12544     
_________________________________________________________________
dense_18 (Dense)             (None, 32)                2080      
_________________________________________________________________
dense_19 (Dense)             (None, 1)                 33        
Total params: 30,657
Trainable params: 30,657
Non-trainable params: 0
_________________________________________________________________


Notice that Bidirectional layer doubles shape of LSTM layer!

# 2-layer bidirectional LSTM

In order to stack two LSTM layers together, use `return_sequences=True`

Question: How do layers stack in case of different amount of LSTM unit?

In [23]:
model2 = tf.keras.Sequential([
    tf.keras.layers.Embedding(VOCAB_SIZE, EMB_DIM, input_length=120),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [24]:
model2.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_10 (Embedding)     (None, 120, 16)           16000     
_________________________________________________________________
bidirectional_10 (Bidirectio (None, 120, 128)          41472     
_________________________________________________________________
bidirectional_11 (Bidirectio (None, 64)                41216     
_________________________________________________________________
dense_20 (Dense)             (None, 8)                 520       
_________________________________________________________________
dense_21 (Dense)             (None, 1)                 9         
Total params: 99,217
Trainable params: 99,217
Non-trainable params: 0
_________________________________________________________________


# Conv1D model

In [5]:
model3 = tf.keras.Sequential([
    tf.keras.layers.Embedding(VOCAB_SIZE, EMB_DIM, input_length=120),
    tf.keras.layers.Conv1D(128, 5, activation='relu'), # 128 filters, 5x5 - filter size
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(EMB_DIM, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [6]:
model2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 120, 64)           64000     
_________________________________________________________________
conv1d (Conv1D)              (None, 116, 128)          41088     
_________________________________________________________________
global_max_pooling1d (Global (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 65        
Total params: 113,409
Trainable params: 113,409
Non-trainable params: 0
_________________________________________________________________


# GRU 

Hint - it's same as LSTM, but computationally simplier.  
In some cases it may give you same results, but faster.  
Sometimes - it won't be as good as LSTM...

In [9]:
VOCAB_SIZE = 1000
EMB_DIM = 64

t = tf.keras.preprocessing.text.Tokenizer(num_words=VOCAB_SIZE)

model4 = tf.keras.Sequential([
    tf.keras.layers.Embedding(VOCAB_SIZE, EMB_DIM),
    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(EMB_DIM)),
    tf.keras.layers.Dense(EMB_DIM, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [10]:
model4.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, None, 64)          64000     
_________________________________________________________________
bidirectional_2 (Bidirection (None, 128)               49920     
_________________________________________________________________
dense_6 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 65        
Total params: 122,241
Trainable params: 122,241
Non-trainable params: 0
_________________________________________________________________


# Conv1D + LSTM + Dropout Regularization

In [5]:
model5 = tf.keras.Sequential([
    tf.keras.layers.Embedding(VOCAB_SIZE, EMB_DIM, input_length=120),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Conv1D(64, 5, activation='relu'),
    tf.keras.layers.MaxPooling1D(pool_size=4),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model5.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model5.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 120, 16)           16000     
_________________________________________________________________
dropout (Dropout)            (None, 120, 16)           0         
_________________________________________________________________
conv1d (Conv1D)              (None, 116, 64)           5184      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 29, 64)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 65        
Total params: 54,273
Trainable params: 54,273
Non-trainable params: 0
__________________________________________________

# 2-layer LSTM with Dropout and Dense layers with L2 regularization

In [None]:
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))
model.add(Bidirectional(LSTM(150, return_sequences = True)))
model.add(Dropout(0.2))
model.add(LSTM(100))
model.add(Dense(total_words/2, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(total_words, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())