### Load tensorflow

In [1]:
import tensorflow as tf

### Load Models

In [2]:
#Connect to Google Drive (where models and toenizers are currently save)
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [3]:
#Location of models
model_location = '/gdrive/My Drive/Great Learning/Sequential NLP/Notebooks/4. Seq2Seq Model/'

In [4]:
#Encoder model
encoder_model = tf.keras.models.load_model(model_location + 'models/seq2seq_encoder_eng_hin.hd5')

#Decoder model
decoder_model = tf.keras.models.load_model(model_location + 'models/seq2seq_decoder_eng_hin.hd5')



In [5]:
encoder_model.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None)]            0         
_________________________________________________________________
embedding_1 (Embedding)      (None, None, 50)          120150    
_________________________________________________________________
lstm_1 (LSTM)                [(None, 256), (None, 256) 314368    
Total params: 434,518
Trainable params: 434,518
Non-trainable params: 0
_________________________________________________________________


### Load tokenizers

In [6]:
import pickle

encoder_t = pickle.load(open(model_location + 'models/encoder_tokenizer_eng','rb'))
decoder_t = pickle.load(open(model_location + 'models/decoder_tokenizer_hin','rb'))

#### Define Configuration parameters

In [7]:
max_encoder_seq_length = 22 #From the training
max_decoder_seq_length = 27 #From the training

In [8]:
#Build a dictionary - Key is word index and value is actual word. This will be useful in prediction
int_to_word_decoder = dict((i,c) for c, i in decoder_t.word_index.items())

In [9]:
#Verify dictionary
int_to_word_decoder[15]

'की'

### Function to generate Padded sequences for Input string

In [17]:
def encode_input(input_str):
    
    #Convert words to indexes
    encoder_seq = encoder_t.texts_to_sequences([input_str])
    
    #Pad sequences
    encoder_input_data = tf.keras.preprocessing.sequence.pad_sequences(encoder_seq, 
                                                                       maxlen=max_encoder_seq_length, 
                                                                       padding='post')
    return encoder_input_data

### Prediction Function

In [18]:
import numpy as np

def decode_sentence(input_str):
        
    #Convert input string to padded sequence
    input_seq = encode_input(input_str)
    
    #Get the encoder state values
    decoder_initial_states_value = encoder_model.predict(input_seq)
    
    #Build a sequence with '<start>' - starting sequence for Decoder
    target_seq = np.zeros((1,1))    
    target_seq[0][0] = decoder_t.word_index['<start>']
    
    #flag to check if prediction should be stopped
    stop_loop = False
    
    #Initialize predicted sentence
    predicted_sentence = ''
    
    #start the loop
    while not stop_loop:
        
        predicted_outputs, h, c = decoder_model.predict([target_seq] + 
                                                        decoder_initial_states_value)
        
        #Get the predicted output with highest probability
        predicted_output = np.argmax(predicted_outputs[0,-1,:])
        
        #Get the predicted word from predicter integer
        predicted_word = int_to_word_decoder[predicted_output]
        
        #Check if prediction should stop
        if(predicted_word == '<end>' or len(predicted_sentence) > max_decoder_seq_length):
            
            stop_loop = True
            continue
                    
        #Updated predicted sentence
        if (len(predicted_sentence) == 0):
            predicted_sentence = predicted_word
        else:
            predicted_sentence = predicted_sentence + ' ' + predicted_word
            
        #Update target_seq to be the predicted word index
        target_seq[0][0] = predicted_output
        
        #Update initial states value forr decoder
        decoder_initial_states_value = [h,c]
        
    
    return predicted_sentence

### Test Decode function

In [19]:
decode_sentence("I'm starving.")

'मैं गाता हैं।'

In [13]:
decode_sentence("I have a dog.")

'मेरे पास एक कुत्ता है।'

In [14]:
decode_sentence("I have a car.")

'मेरे पास एक गाड़ी है।'

In [15]:
decode_sentence("I have a xyz.")

'मैं गाता हूँ।'