# Bidirectional LSTM: Application

In [1]:
from nmt_utils import *
import numpy as np
import tensorflow as tf

In [2]:
dataset, human_vocab, machine_vocab, inv_machine_vocab = load_dataset()

100%|██████████| 10000/10000 [00:00<00:00, 21067.22it/s]


In [3]:
dataset[:10]

[('9 may 1998', '1998-05-09'),
 ('10.11.19', '2019-11-10'),
 ('9/10/70', '1970-09-10'),
 ('saturday april 28 1990', '1990-04-28'),
 ('thursday january 26 1995', '1995-01-26'),
 ('monday march 7 1983', '1983-03-07'),
 ('sunday may 22 1988', '1988-05-22'),
 ('08 jul 2008', '2008-07-08'),
 ('8 sep 1999', '1999-09-08'),
 ('thursday january 1 1981', '1981-01-01')]

In [4]:
human_vocab

{' ': 0,
 '.': 1,
 '/': 2,
 '0': 3,
 '1': 4,
 '2': 5,
 '3': 6,
 '4': 7,
 '5': 8,
 '6': 9,
 '7': 10,
 '8': 11,
 '9': 12,
 'a': 13,
 'b': 14,
 'c': 15,
 'd': 16,
 'e': 17,
 'f': 18,
 'g': 19,
 'h': 20,
 'i': 21,
 'j': 22,
 'l': 23,
 'm': 24,
 'n': 25,
 'o': 26,
 'p': 27,
 'r': 28,
 's': 29,
 't': 30,
 'u': 31,
 'v': 32,
 'w': 33,
 'y': 34,
 '<unk>': 35,
 '<pad>': 36}

In [5]:
machine_vocab

{'-': 0,
 '0': 1,
 '1': 2,
 '2': 3,
 '3': 4,
 '4': 5,
 '5': 6,
 '6': 7,
 '7': 8,
 '8': 9,
 '9': 10}

In [6]:
inv_machine_vocab

{0: '-',
 1: '0',
 2: '1',
 3: '2',
 4: '3',
 5: '4',
 6: '5',
 7: '6',
 8: '7',
 9: '8',
 10: '9'}

In [7]:
X_train, Y_train = preprocess_data(dataset, human_vocab, machine_vocab)

print('Shape of X_train:', X_train.shape)
print('Shape of Y_train:', Y_train.shape)

Shape of X_train: (10000, 30, 37)
Shape of Y_train: (10000, 10, 11)


In [8]:
def Model(params):
    dims = params['dims']
    X_train = params['X_train']
    Y_train = params['Y_train']
    epochs = params['epochs']
    learning_rate = params['learning_rate']
    batch_size = params['batch_size']
    f1 = tf.keras.layers.Conv1D(filters=48, 
                                kernel_size=3, 
                                strides=3,
                                padding='valid',
                                activation='relu',
                                kernel_initializer='glorot_uniform',
                                bias_initializer='zeros')
    f2 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=64,
                                                            activation='tanh',
                                                            recurrent_activation='sigmoid',
                                                            kernel_initializer='glorot_uniform',
                                                            bias_initializer='zeros',
                                                            recurrent_initializer='zeros',
                                                            return_sequences=True,
                                                            return_state = False), 
                                       merge_mode='concat')
    f3 = tf.keras.layers.Dense(units=dims[1][1], 
                               activation='softmax', 
                               kernel_initializer='glorot_uniform', 
                               bias_initializer='zeros')
    x = tf.keras.Input(shape=dims[0])
    a1 = f1(x)
    a2 = f2(a1)
    y = f3(a2)
    model = tf.keras.Model(x, y)    
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, 
                                         beta_1=0.9, 
                                         beta_2=0.999, 
                                         epsilon=1e-07)
    model.compile(loss='categorical_crossentropy', metrics=['categorical_accuracy'], optimizer=optimizer)
    model.summary()
    model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size)
    return model

In [9]:
model = Model({'dims': [X_train.shape[1:], Y_train.shape[1:]], 
               'X_train': X_train,
               'Y_train': Y_train,
               'epochs': 50, 
               'learning_rate': 0.005,
               'batch_size': 64})

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 30, 37)]          0         
_________________________________________________________________
conv1d (Conv1D)              (None, 10, 48)            5376      
_________________________________________________________________
bidirectional (Bidirectional (None, 10, 128)           57856     
_________________________________________________________________
dense (Dense)                (None, 10, 11)            1419      
Total params: 64,651
Trainable params: 64,651
Non-trainable params: 0
_________________________________________________________________
Train on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
E

In [10]:
examples = ['3 May 1979', 
            '5 April 09', 
            '21th of August 2016', 
            'Tue 10 Jul 2007', 
            'Saturday May 9 2018', 
            'March 3 2001', 
            'March 3rd 2001', 
            '1 November 2011',
            'September 21st 2020',
            '20/06/21']

X_examples = np.zeros(shape=(len(examples), 30, len(human_vocab)), dtype='float32')
for i, example in enumerate(examples):
    sequence = string_to_int(example, 30, human_vocab)
    for j, n in enumerate(sequence):
        X_examples[i,j,n] = 1.0
        
Y_examples = model.predict(X_examples)
prediction = np.argmax(Y_examples, axis = -1)

for i in range(len(examples)):
    date = ''
    for j in prediction[i,:]:
        date = date + inv_machine_vocab[int(j)]
    print("Original Date: {} - Predicted Date: {}".format(examples[i], date))

Original Date: 3 May 1979 - Predicted Date: 1979-05-03
Original Date: 5 April 09 - Predicted Date: 2019-04-05
Original Date: 21th of August 2016 - Predicted Date: 2016-09-10
Original Date: Tue 10 Jul 2007 - Predicted Date: 2007-08-00
Original Date: Saturday May 9 2018 - Predicted Date: 2018-05-09
Original Date: March 3 2001 - Predicted Date: 2001-03-03
Original Date: March 3rd 2001 - Predicted Date: 2001-03-03
Original Date: 1 November 2011 - Predicted Date: 2011-11-01
Original Date: September 21st 2020 - Predicted Date: 2020-09-21
Original Date: 20/06/21 - Predicted Date: 2021-12-20
