In [198]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# English - Hindi Translation Chatbot


In [0]:
import os
import gc
#os.chdir('E:/Anupam/HomeDocs/DataScience/ExternalAssignments/NLP/Language Translation/')
import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [200]:
#Change the working directory
os.chdir('/content/gdrive/My Drive/Language Translation')
os.listdir()


['Dataset',
 'seq2seq_encoder_eng_hin.hd5',
 'seq2seq_decoder_eng_hin.hd5',
 'encoder_tokenizer_eng',
 'decoder_tokenizer_hin']

In [0]:
import pandas as pd
import numpy as np
import math
import nltk
import re 
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string
import tensorflow as tf
from tensorflow.keras import backend as backend
import codecs
tf.set_random_seed(42)

In [202]:
with codecs.open('Dataset/Engligh-Hindi Translation Dataset.txt', encoding='utf-8') as f:
    texual_data = f.read()
texual_data[0:200]

"\ufeffWow!\tवाह!\r\nHelp!\tबचाओ!\r\nJump.\tउछलो.\r\nJump.\tकूदो.\r\nJump.\tछलांग.\r\nHello!\tनमस्ते।\r\nHello!\tनमस्कार।\r\nCheers!\tवाह-वाह!\r\nCheers!\tचियर्स!\r\nGot it?\tसमझे कि नहीं?\r\nI'm OK.\tमैं ठीक हूँ।\r\nAwesome!\tबहुत बढ़िया!\r"

### Spell Correction, Removing punctuation, Preparing word to vector dictionary

In [203]:
#Split by newline character
texual_data =  texual_data.split('\n')
#Show some Data
texual_data[100:105]

["I don't know.\tमुझे नहीं मालूम।\r",
 'I have a car.\tमेरे पास एक गाड़ी है।\r',
 'I have a dog.\tमेरे पास एक कुत्ता है।\r',
 'I understand.\tमैं समझता हूँ।\r',
 "I'm a doctor.\tमैं डॉक्टर हूँ।\r"]

In [204]:
len(texual_data)

2868

### Separate Source and Target pairs

In [0]:
encoder_text = [] #Initialize Source language list
decoder_text = [] #Initialize Target language list

#Iterate over data
for line in texual_data:
    try:
        english_text, hindi_text = line.split('\t')
        encoder_text.append(english_text)
        
        # Add tab '<start>' as 'start sequence in target
        # And '<end>' as End
        hindi_text = hindi_text.replace('\r', '')
        decoder_text.append('<start> ' + hindi_text + ' <end>')
    except:
        pass #ignore data which goes into error  

### Separate Source and Target pairs

In [206]:
display(encoder_text[100:105])
decoder_text[100:105]

["I don't know.",
 'I have a car.',
 'I have a dog.',
 'I understand.',
 "I'm a doctor."]

['<start> मुझे नहीं मालूम। <end>',
 '<start> मेरे पास एक गाड़ी है। <end>',
 '<start> मेरे पास एक कुत्ता है। <end>',
 '<start> मैं समझता हूँ। <end>',
 '<start> मैं डॉक्टर हूँ। <end>']

### Tokenize Source language sentences

In [207]:
#Tokenizer for source language
encoder_t = tf.keras.preprocessing.text.Tokenizer()
encoder_t.fit_on_texts(encoder_text) #Fit it on Source sentences
encoder_seq = encoder_t.texts_to_sequences(encoder_text) #Convert sentences to numbers 
encoder_seq[100:105] #Display some converted sentences

[[2, 28, 43], [2, 12, 6, 100], [2, 12, 6, 130], [2, 213], [38, 6, 153]]

In [208]:
encoder_t.word_index

{'the': 1,
 'i': 2,
 'to': 3,
 'you': 4,
 'is': 5,
 'a': 6,
 'he': 7,
 'of': 8,
 'in': 9,
 'my': 10,
 'it': 11,
 'have': 12,
 'this': 13,
 'me': 14,
 'she': 15,
 'for': 16,
 'was': 17,
 'are': 18,
 'do': 19,
 'that': 20,
 'his': 21,
 'your': 22,
 'we': 23,
 'will': 24,
 'what': 25,
 'on': 26,
 'him': 27,
 "don't": 28,
 'at': 29,
 'not': 30,
 'her': 31,
 'like': 32,
 'go': 33,
 'with': 34,
 'be': 35,
 'how': 36,
 'and': 37,
 "i'm": 38,
 'can': 39,
 'time': 40,
 'there': 41,
 'has': 42,
 'know': 43,
 'all': 44,
 'up': 45,
 'they': 46,
 'come': 47,
 'as': 48,
 'very': 49,
 'had': 50,
 'from': 51,
 "it's": 52,
 'please': 53,
 'did': 54,
 'when': 55,
 'by': 56,
 'want': 57,
 'am': 58,
 'here': 59,
 'out': 60,
 'an': 61,
 'going': 62,
 'been': 63,
 'get': 64,
 'take': 65,
 'about': 66,
 'no': 67,
 'father': 68,
 "can't": 69,
 'book': 70,
 'if': 71,
 'india': 72,
 'were': 73,
 'one': 74,
 'tom': 75,
 'money': 76,
 'english': 77,
 'two': 78,
 'tomorrow': 79,
 'would': 80,
 "i'll": 81,
 'day': 

### Tokenize Target language sentences

In [209]:
#Tokenizer for target language, filters should not <start> and <end>
#remove < and > used in Target language sequences
decoder_t = tf.keras.preprocessing.text.Tokenizer(filters='!"#$%&()*+,-./:;=?@[\\]^_`{|}~\t\n')
decoder_t.fit_on_texts(decoder_text) #Fit it on target sentences
decoder_seq = decoder_t.texts_to_sequences(decoder_text) #Convert sentences to numbers 
decoder_seq[100:105]

[[1, 12, 5, 1499, 2],
 [1, 28, 40, 21, 106, 3, 2],
 [1, 28, 40, 21, 208, 3, 2],
 [1, 6, 779, 19, 2],
 [1, 6, 186, 19, 2]]

In [210]:
#Maximum length of sentence
max_encoder_seq_length = max([len(txt) for txt in encoder_seq])
max_decoder_seq_length = max([len(txt) for txt in decoder_seq])
print('Maximum sentence length for Encoder language: ', max_encoder_seq_length)
print('Maximum sentence length for Decoder language: ', max_decoder_seq_length)

#Target language Vocablury
encoder_vocab_size = len(encoder_t.word_index)
decoder_vocab_size = len(decoder_t.word_index)
print('Source language vocablury size: ', encoder_vocab_size)
print('Target language vocablury size: ', decoder_vocab_size)

Maximum sentence length for Encoder language:  22
Maximum sentence length for Decoder language:  27
Source language vocablury size:  2404
Target language vocablury size:  3009


In [211]:
#Make the encoder sequences of equal length and decoder sequence of equal length. In the decoder sequence, we are going to 
#pad the sequence in the beginning as we want the text to be at the end so that LSTM remembers it better
#Encoder Sentences
encoder_input_data = tf.keras.preprocessing.sequence.pad_sequences(encoder_seq, maxlen=max_encoder_seq_length, padding='pre')
#Decoder Sentences
decoder_input_data = tf.keras.preprocessing.sequence.pad_sequences(decoder_seq, maxlen=max_decoder_seq_length, padding='post')
print('Source data shape: ', encoder_input_data.shape)
print('Target data shape: ', decoder_input_data.shape)

Source data shape:  (2867, 22)
Target data shape:  (2867, 27)


In [212]:
print(encoder_input_data[0])
decoder_input_data[0]

[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0 1280]


array([  1, 767,   2,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0], dtype=int32)

### Integer to Word converter for Decoder data
This will be used when we want to convert the output of decoder into text

In [213]:
#Pls note that the index starts from 1. Thus there will not be any text at 0 position
int_to_word_decoder = dict((i,text) for text, i in decoder_t.word_index.items())
int_to_word_decoder[1]

'<start>'

#### Building Decoder Output

In [214]:
#Initialize array
decoder_target_data = np.zeros((decoder_input_data.shape[0], decoder_input_data.shape[1]))

#Shift Target output by one word
for i in range(decoder_input_data.shape[0]):
    for j in range(1,decoder_input_data.shape[1]):
        decoder_target_data[i][j-1] = decoder_input_data[i][j]
decoder_target_data[0]

array([767.,   2.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.])

### Convert target data in one hot vector

In [0]:
#Initialize one hot encoding array
decoder_target_one_hot = np.zeros((decoder_input_data.shape[0], #number of sentences
                                   decoder_input_data.shape[1], #Number of words in each sentence
                                   len(decoder_t.word_index)+1)) #Vocab size + 1

In [216]:
#Build one hot encoded array
for i in range(decoder_target_data.shape[0]):
    for j in range(decoder_target_data.shape[1]):
        decoder_target_one_hot[i][j] = tf.keras.utils.to_categorical(decoder_target_data[i][j],
                                                                     num_classes=len(decoder_t.word_index)+1) 
decoder_target_one_hot.shape

(2867, 27, 3010)

### Building the Training Model

In [0]:
#Define config parameters
encoder_embedding_size = 256
decoder_embedding_size = 256
rnn_units = 256
training_epochs = 25
batch_size = 64

#### Build Encoder

In [0]:
backend.clear_session()
tf.reset_default_graph()

In [0]:
#Input Layer
encoder_inputs = tf.keras.layers.Input(shape=(None,))

#Embedding layer
encoder_embedding = tf.keras.layers.Embedding(encoder_vocab_size+1, encoder_embedding_size)

#Get embedding layer output by feeding inputs
encoder_embedding_output = encoder_embedding(encoder_inputs)

#LSTM Layer and its output
x, state_h, state_c = tf.keras.layers.LSTM(rnn_units,return_state=True)(encoder_embedding_output)

#Build a list to feed Decoder
encoder_states = [state_h, state_c]

In [220]:
state_c

<tf.Tensor 'lstm/while/Exit_3:0' shape=(?, 256) dtype=float32>

#### Build Decoder

In [0]:
#Decode input - padded Target sentences
decoder_inputs = tf.keras.layers.Input(shape=(None,))

#Decoder Embedding layer
decoder_embedding = tf.keras.layers.Embedding(decoder_vocab_size + 1, decoder_embedding_size)

#Embedding layer output
decoder_embedding_output = decoder_embedding(decoder_inputs)

#Decoder RNN
decoder_rnn = tf.keras.layers.LSTM(rnn_units, return_sequences=True, return_state=True)

#Decoder RNN Output, State initialization from Encoder states
#Output will be all hidden sequences, last 'h' state and last 'c' state
x,_,_ = decoder_rnn(decoder_embedding_output, initial_state=encoder_states)

#Output Layer
decoder_dense = tf.keras.layers.Dense(decoder_vocab_size + 1, activation='softmax')

#Output of Dense layer
decoder_outputs = decoder_dense(x)

In [222]:
decoder_outputs

<tf.Tensor 'dense/truediv:0' shape=(?, ?, 3010) dtype=float32>

#### Build Model using both Encoder and Decoder

In [223]:
model = tf.keras.models.Model([encoder_inputs, decoder_inputs], #2 Inputs to the model
                              decoder_outputs) #Output of the model
model.output

<tf.Tensor 'dense/truediv:0' shape=(?, ?, 3010) dtype=float32>

In [0]:
model.compile(optimizer='adam', loss='categorical_crossentropy')

#### Train the model

In [225]:
model.fit([encoder_input_data, decoder_input_data], decoder_target_one_hot, batch_size = batch_size, verbose = 1, \
          epochs = training_epochs, validation_split = 0.2)

Train on 2293 samples, validate on 574 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<tensorflow.python.keras.callbacks.History at 0x7f0a2413af98>

### Building Model for Prediction

In [0]:
encoder_model = tf.keras.models.Model(encoder_inputs, #Padded input sequences
                                      encoder_states) #Hidden state and Cell state at last time step

### Build the Decoder Model
Define Input for both 'h' state and 'c' state initialization
Get Decoder RNN outputs along with h and c state
Get Decoder Dense layer output
Build Model

#### Step 1 - Define Input for both 'h' state and 'c' state initialization

In [0]:

#Hidden state input
decoder_state_input_h = tf.keras.layers.Input(shape=(rnn_units,))

#Cell state input
decoder_state_input_c = tf.keras.layers.Input(shape=(rnn_units,))

#Putting it together
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

#### Step 2 - Get Decoder RNN outputs along with h and c state

In [0]:
#Get Embedding layer output
x = decoder_embedding(decoder_inputs)

#We will use the layer which we trained earlier
rnn_outputs, state_h, state_c = decoder_rnn(x, initial_state=decoder_states_inputs)

#Why do we need this?
decoder_states = [state_h, state_c]

#### Step 3 - Get Decoder Dense layer output

In [0]:
decoder_outputs = decoder_dense(rnn_outputs)

#### Step 4 - Build Decoder Model

In [0]:
decoder_model = tf.keras.models.Model([decoder_inputs] + decoder_states_inputs,  #Model inputs
                                      [decoder_outputs] + decoder_states)

#### Predicting output from Seq2Seq model

In [231]:
decoder_t.word_index['<start>']
int_to_word_decoder[1]

'<start>'

In [0]:
def decode_sentence(input_sequence):
    
    #Get the encoder state values - Sentence embedding
    decoder_initial_states_value = encoder_model.predict(input_seq)
    
    #Build a sequence with '<start>' - starting sequence for Decoder
    target_seq = np.zeros((1,1))    
    target_seq[0][0] = decoder_t.word_index['<start>']
    
    #flag to check if prediction should be stopped
    stop_loop = False
    
    #Initialize predicted sentence
    predicted_sentence = ''
    
    num_of_predictions = 0
    
    #start the loop
    while not stop_loop:
        
        predicted_outputs, h, c = decoder_model.predict([target_seq] + 
                                                        decoder_initial_states_value)
        
        #Get the predicted word index with highest probability
        predicted_output = np.argmax(predicted_outputs[0,-1,:])
        
        #Get the predicted word from predicter index
        predicted_word = int_to_word_decoder[predicted_output]
        
        #Check if prediction should stop
        if(predicted_word == '<end>' or num_of_predictions > max_decoder_seq_length):
            
            stop_loop = True
            continue
        
        num_of_predictions += 1
        
        #Updated predicted sentence
        if (len(predicted_sentence) == 0):
            predicted_sentence = predicted_word
        else:
            predicted_sentence = predicted_sentence + ' ' + predicted_word
            
        #Update target_seq to be the predicted word index
        target_seq[0][0] = predicted_output
        
        #Update initial states value for decoder
        decoder_initial_states_value = [h,c]
        
    
    return predicted_sentence

#### Call Prediction function on a random sentence

In [233]:
#Generate a random number
start_num = np.random.randint(0, high=len(encoder_text) - 10)

#Predict model output for 5 sentences
for i in range(start_num, start_num + 5):
    input_seq = encoder_input_data[i : i+1]
    predicted_sentence = decode_sentence(input_seq)
    print('--------')
    print ('Input sentence: ', encoder_text[i])
    print ('Predicted sentence: ', predicted_sentence )

--------
Input sentence:  Won't you come to dine with us?
Predicted sentence:  तुम तुम्हें साथ नहीं नहीं गया।
--------
Input sentence:  You may take anything you like.
Predicted sentence:  तुम तुम्हें नहीं नहीं हो क्या
--------
Input sentence:  You must be back by 10 o'clock.
Predicted sentence:  तुम एक देर के लिए एक मदद करना होगा।
--------
Input sentence:  You owe me an apology for that.
Predicted sentence:  मैं कल से मदद से चाहता हूँ।
--------
Input sentence:  You should have come yesterday.
Predicted sentence:  तुम तुम क्या कर सकते हो


#### Save encoder and decoder model

In [0]:
#Compile models to avoid error
encoder_model.compile(optimizer='adam',loss='categorical_crossentropy')
decoder_model.compile(optimizer='adam',loss='categorical_crossentropy')

#Save the models
encoder_model.save('seq2seq_encoder_eng_hin.hd5')  #Encoder model
decoder_model.save('seq2seq_decoder_eng_hin.hd5')  #Decoder model

#### Save encoder and decoder tokenizers

In [0]:
import pickle

pickle.dump(encoder_t,open('encoder_tokenizer_eng','wb'))
pickle.dump(decoder_t,open('decoder_tokenizer_hin','wb'))

# Model with Attention

#### Build Encoder

In [0]:
backend.clear_session()
tf.reset_default_graph()

In [0]:
#Input Layer
encoder_inputs = tf.keras.layers.Input(shape=(None,))

#Embedding layer
encoder_embedding = tf.keras.layers.Embedding(encoder_vocab_size+1, encoder_embedding_size)

#Get embedding layer output by feeding inputs
encoder_embedding_output = encoder_embedding(encoder_inputs)

#### Build Encoder - Get all hidden states

In [0]:
#Create LSTM Layer and get All hidden states, last hidden and cell state
encoder_lstm = tf.keras.layers.LSTM(rnn_units,return_state=True, return_sequences=True)

#Get 3 outputs of LSTM Layer
encoder_all_h_states, state_h, state_c = encoder_lstm(encoder_embedding_output)

#Build a list to feed Decoder
encoder_states = [state_h, state_c]

#### Build Decoder

In [0]:
#Decode input - padded Target sentences
decoder_inputs = tf.keras.layers.Input(shape=(None,))

#Decoder Embedding layer
decoder_embedding = tf.keras.layers.Embedding(decoder_vocab_size + 1, decoder_embedding_size)

#Embedding layer output
decoder_embedding_output = decoder_embedding(decoder_inputs)

#Decoder RNN
decoder_rnn = tf.keras.layers.LSTM(rnn_units, return_sequences=True, return_state=True)

#Decoder RNN Output, State initialization from Encoder states
#Output will be all hidden sequences, last 'h' state and last 'c' state
decoder_all_h_states,_,_ = decoder_rnn(decoder_embedding_output, 
                                       initial_state=encoder_states)

#### Build Decoder...Alignment Matrix

In [0]:
#1. Dot Product between Decoder_all_h_states and encoder_all_h_states
#2. Apply softmax to get Alignment matrix

#Dimensions details
#decoder_all_states = batch_size x max_decoder_length x rnn_units
#encoder_all_states = batch_size x max_encoder_length x rnn_units
#score = batch_size x max_decoder_length x max_encoder_length
#alignment matrix = batch_size x max_decoder_length x max_encoder_length
#axes = 2 implies it will transpose and then multiply

score = tf.keras.layers.dot([decoder_all_h_states, encoder_all_h_states], axes=2)
#Now apply softmax on Score to convert each number into percentage or fraction
alignment_matrix = tf.keras.layers.Activation('softmax')(score)

#Try general and concat approaches to alignment matrix

#### Build Decoder...Context Vector

In [0]:
#Weighted sum of multiplication of Alignment matrix and encoder states
#Dimension of context_vector =  batch_size x max_decoder_length x rnn_units
#axes = [2,1] implies first, take the transpose of alignment_matrix and then apply dot product with encoder_all_h_states
context_vector = tf.keras.layers.dot([alignment_matrix, encoder_all_h_states], axes=[2,1])

#### Build Decoder...Attention Vector

In [0]:
#Concatenate context vector and decoder_all_h_states
#context_decoder_hidden = batch_size x max_decoder_length x rnn_units
#attention_vector = batch_size x max_decoder_length x rnn_units(whatever was assigned in earliercell)

context_decoder_hidden = tf.keras.layers.concatenate([context_vector, 
                                                      decoder_all_h_states])

attention_dense_layer = tf.keras.layers.Dense(rnn_units, use_bias=False, 
                                              activation='relu')

attention_vector = attention_dense_layer(context_decoder_hidden)

#### Build Decoder...Output layer

In [0]:
#Output layer. Now we build the dense layer with the probabilities equal to no of words. Each worf will be output as probability
decoder_dense = tf.keras.layers.Dense(decoder_vocab_size + 1, activation='softmax')

#With attention input will be attention_vector and not decoder_all_h_states
decoder_outputs = decoder_dense(attention_vector)

### Build Model using both Encoder and Decoder

In [0]:
model = tf.keras.models.Model([encoder_inputs, decoder_inputs], #2 Inputs to the model
                              decoder_outputs) #Output of the model
model.compile(optimizer='adam', loss='categorical_crossentropy')

### Train the model

In [245]:
model.fit([encoder_input_data, decoder_input_data], decoder_target_one_hot, batch_size = batch_size, verbose = 1, \
          epochs = training_epochs, validation_split = 0.2)

Train on 2293 samples, validate on 574 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<tensorflow.python.keras.callbacks.History at 0x7f0a1e6494a8>

## Building Model for Prediction

#### Build the Encoder Model to predict Encoder States

In [0]:
encoder_model = tf.keras.models.Model(inputs=encoder_inputs, #Padded input sequences
                                      outputs=[encoder_all_h_states] + #Hidden states at all time steps
                                      encoder_states) #Hidden state and Cell state at last time step

### Build the Decoder Model
Define Input for both 'h' state and 'c' state initialization
Define Input for all encoder states - Attention Layer
Get Decoder RNN outputs along with h and c state
Build Attention Layer
Get Decoder Dense layer output using Attention vector
Build Model

#### Step 1 - Define Input for both 'h' state and 'c' state initialization

In [0]:
#Hidden state input
decoder_state_input_h = tf.keras.layers.Input(shape=(rnn_units,))

#Cell state input
decoder_state_input_c = tf.keras.layers.Input(shape=(rnn_units,))

#Putting it together
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

#### Step 2 - Define Input encoder states - Attention Layer

In [0]:
encoder_outputs = tf.keras.layers.Input(shape=(max_encoder_seq_length, rnn_units,))

#### Step 3 - Get Decoder RNN outputs along with h and c state

In [0]:
#Get Embedding layer output
x = decoder_embedding(decoder_inputs)

#We will use the layer which we trained earlier
rnn_outputs, state_h, state_c = decoder_rnn(x, initial_state=decoder_states_inputs)

#Why do we need this?
decoder_states = [state_h, state_c]

#### Step 4 - Build Attention Layer

In [250]:
#Alignment score
p_score = tf.keras.layers.dot([rnn_outputs, encoder_outputs], axes=2)

#Perform softmax to get Alignment matrix
p_alignment_matrix = tf.keras.layers.Activation('softmax')(p_score)

#Context Vector
p_context_vector = tf.keras.layers.dot([p_alignment_matrix, encoder_outputs], axes=[2,1])

#Build Attention Vector
# 1. Caoncatenate both context vector and decoder outputs
# 2. Feed it to the Dense layer 
p_context_decoder_hidden = tf.keras.layers.concatenate([p_context_vector, rnn_outputs])
p_attention_vector = attention_dense_layer(p_context_decoder_hidden)

p_alignment_matrix

<tf.Tensor 'activation_1/truediv:0' shape=(?, ?, 22) dtype=float32>

#### Step 5 - Get Decoder Dense layer output

In [0]:
decoder_outputs = decoder_dense(p_attention_vector)

#### Step 6 - Build Decoder Model

In [0]:
#3 Inputs - Word, h/c state and all hidden states from encoder
#3 Outputs - predicted word, h and c state values for next run and alignment matrix for visualization

decoder_model = tf.keras.models.Model([decoder_inputs] +  #Start sequence and then word
                                      decoder_states_inputs + #h and c state value for initialization
                                      [encoder_outputs],  #Encoder all hidden states for Attention layer
                                      [decoder_outputs] + #Model word prediction
                                      decoder_states +   #h and c states for next run
                                      [p_alignment_matrix]) #for Alignment matrix

### Predicting output from Seq2Seq model

In [0]:
def decode_sentence(input_sequence):
    
    #Get the encoder state values
    encoder_output =  encoder_model.predict(input_sequence)
    decoder_initial_states_value = encoder_output[1:]    
    encoded_seqs = encoder_output[0]
    
    #Build a sequence with '<start>' - starting sequence for Decoder
    target_seq = np.zeros((1,1))    
    target_seq[0][0] = decoder_t.word_index['<start>']
    
    #flag to check if prediction should be stopped
    stop_loop = False
    
    #Initialize predicted sentence
    predicted_sentence = ''
    
    #start the loop
    while not stop_loop:
        
        #Decoder model with 3 inputs
        predicted_outputs, h, c, a_matrix = decoder_model.predict([target_seq] + 
                                                                  decoder_initial_states_value +
                                                                  [encoded_seqs])
        
        #Get the predicted word index with highest probability
        predicted_output = np.argmax(predicted_outputs[0,-1,:])
        
        #Get the predicted word from predicter index
        if (predicted_output == 0):
            predicted_word = ' '
        else:
            predicted_word = int_to_word_decoder[predicted_output]
        
        #Check if prediction should stop
        if(predicted_word == '<end>' or len(predicted_sentence) > max_decoder_seq_length):
            
            stop_loop = True
            continue
                    
        #Updated predicted sentence
        if (len(predicted_sentence) == 0):
            predicted_sentence = predicted_word
        else:
            predicted_sentence = predicted_sentence + ' ' + predicted_word
            
        #Update target_seq to be the predicted word index
        target_seq[0][0] = predicted_output
        
        #Update initial states value for decoder
        decoder_initial_states_value = [h,c]
        
        print(a_matrix)
    
    return predicted_sentence

#### Call Prediction function on a random sentence

#### Compile encoder and decoder model

In [0]:
#Compile models to avoid error
encoder_model.compile(optimizer='adam',loss='categorical_crossentropy')
decoder_model.compile(optimizer='adam',loss='categorical_crossentropy')

In [258]:
#Generate a random number
start_num = np.random.randint(0, high=len(encoder_text) - 10)

#Predict model output for 5 sentences
for i in range(start_num, start_num + 5):
    input_seq = encoder_input_data[i : i+1]
    #print(input_seq)
    predicted_sentence = decode_sentence(input_seq)
    print('--------')
    print ('Input sentence: ', encoder_text[i])
    print ('Predicted sentence: ', predicted_sentence )

[[[4.3143937e-38 1.8504775e-33 4.2454395e-27 5.9206155e-21 1.3727374e-17
   1.6162657e-16 3.3117601e-16 4.0786500e-16 4.3975568e-16 4.5414100e-16
   4.6065786e-16 4.6332258e-16 4.6402831e-16 4.6373984e-16 4.6298334e-16
   4.6205351e-16 4.6110266e-16 9.9999344e-01 6.5051536e-06 5.8702924e-11
   2.2036130e-13 5.4275768e-12]]]
[[[3.7825784e-22 5.2033807e-19 9.2005252e-15 8.2589165e-11 7.1980821e-09
   2.1035387e-08 2.7283047e-08 2.9322514e-08 3.0441896e-08 3.1225920e-08
   3.1785969e-08 3.2182129e-08 3.2458551e-08 3.2649421e-08 3.2780086e-08
   3.2869611e-08 3.2930860e-08 9.9187684e-01 8.0915336e-03 3.0116218e-05
   6.3913768e-07 5.6867117e-07]]]
[[[2.4118321e-15 1.4722118e-13 4.8515698e-11 1.7817207e-08 5.3245975e-07
   1.5126996e-06 1.9275451e-06 2.0249222e-06 2.0572220e-06 2.0796742e-06
   2.0996426e-06 2.1176002e-06 2.1332603e-06 2.1464925e-06 2.1574228e-06
   2.1663211e-06 2.1734936e-06 9.9822813e-01 1.7105297e-03 2.6894371e-05
   7.8566922e-07 8.5779902e-06]]]
[[[1.0202640e-20 1.448