<a href="https://colab.research.google.com/github/dustejuned/nlp/blob/master/lstms_with_attention.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf
from tensorflow import keras
from keras_preprocessing import sequence
import os

In [0]:
class Attention(tf.keras.Model):
    def __init__(self, units):
        super(Attention, self).__init__()
        self.Layer1 = tf.keras.layers.Dense(units)
        self.Layer2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, features, hidden):
        hidden_time_axis = tf.expand_dims(hidden, 1)
        score = tf.nn.tanh(self.Layer1(features) + self.Layer2(hidden_time_axis))
        attention_weights = tf.nn.softmax(self.V(score), axis = 1)
        context_vector = attention_weights * features
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector

In [0]:
def BuildModel(vocab_size=10000, max_input_len=200, rnn_cell_size=128, dropout=0.2):
    #Randomly initialize embedding vector
    input_sequence = keras.Input(shape=(max_input_len,), dtype='int32')
    embedded_sequence = keras.layers.Embedding(vocab_size, 128, input_length=max_input_len)(input_sequence)
    #create first bidirectional lstm layer
    lstm = keras.layers.Bidirectional(keras.layers.LSTM(rnn_cell_size, dropout=dropout, return_sequences=True, return_state=True, recurrent_activation='relu', recurrent_initializer='glorot_uniform'), name='bidirectional_lstm_0')(embedded_sequence)
    lstm, forward_h, forward_c, backward_h, backward_c = keras.layers.Bidirectional(keras.layers.LSTM(rnn_cell_size, dropout=dropout, return_sequences=True, return_state=True, recurrent_activation='relu', recurrent_initializer='glorot_uniform'), name='bidirectional_lstm_1')(lstm)

    #concatenate the input of both hidden state before calulating attention
    state_h = keras.layers.Concatenate()([forward_h, backward_h])
    #state_c = keras.layers.Concatenate()([forward_c, backward_c])

    attention = Attention(128)
    context_vector = attention.call(lstm, state_h)

    output = keras.layers.Dense(1, activation='sigmoid')(context_vector)

    model = keras.Model(inputs=input_sequence, outputs=output)
    
    return model

In [0]:
def TrainModel(model, x_train, y_train):
    model.compile(optimizer= tf.train.AdamOptimizer(), loss= 'binary_crossentropy', metrics=['accuracy'])
    early_stopping_cb = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto')
    history = model.fit(x_train, y_train, epochs=10, batch_size=200, validation_split=.3, verbose = 1, callbacks=[early_stopping_cb])

    return model, history

In [0]:
def TestModel(model, x_test, y_test):
    result = model.evaluate(x_test, y_test)
    return result

In [12]:
vocab_size = 10000
max_len = 200
pad_id = 0
start_id = 1
unknown_id = 2
index_offset = 2

(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=vocab_size, start_char=start_id, oov_char=unknown_id, index_from=index_offset)

word2idx = keras.datasets.imdb.get_word_index()

idx2word = {i + index_offset: w for w, i in word2idx.items()}
idx2word[pad_id] = '<PAD>'
idx2word[start_id] = '<START>'
idx2word[unknown_id] = '<UNKNOWN>'

x_train = sequence.pad_sequences(x_train, maxlen=max_len, truncating='post', padding='post', value=pad_id)
x_test = sequence.pad_sequences(x_test, maxlen=max_len, truncating='post', padding='post', value=pad_id)

model = BuildModel()
print(model.summary())
model, history = TrainModel(model, x_train, y_train)
print(history)
result = TestModel(model, x_test, y_test)
print(result)

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 200)]        0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 200, 128)     1280000     input_3[0][0]                    
__________________________________________________________________________________________________
bidirectional_lstm_0 (Bidirecti [(None, 200, 256), ( 263168      embedding_2[0][0]                
__________________________________________________________________________________________________
bidirectional_lstm_1 (Bidirecti [(None, 200, 256), ( 394240      bidirectional_lstm_0[0][0]       
                                                                 bidirectional_lstm_0[0][1] 

In [14]:
history.history

{'acc': [0.5141714,
  0.50285715,
  0.50285715,
  0.50285715,
  0.50285715,
  0.50285715,
  0.50285715,
  0.50285715,
  0.50285715,
  0.50285715],
 'loss': [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
 'val_acc': [0.49333334,
  0.49333334,
  0.49333334,
  0.49333334,
  0.49333334,
  0.49333334,
  0.49333334,
  0.49333334,
  0.49333334,
  0.49333334],
 'val_loss': [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]}