In [11]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model, load_model 
from tensorflow.keras.layers import Input

def get_lookup (df):
    token2id = {}
    id2token = {}
    count = 0
    prob = []
    counts = df['count'].to_list()
    norm = np.sum (counts)
    prob = [p /norm for p in counts]
    
    for index, row in df.iterrows():
        token2id [row['word']] = count
        id2token [count] = row['word']
        count+=1
    return token2id, id2token, prob      


def text2vec (input_text, df_vocab, vocab_size, vec_len):
    D = df_vocab.iloc[:vocab_size]
    ids = [i for i in range (0, len (D))]
    D = D.assign (id=ids)
    D.index = D['word'].to_list()
    text_vecs = []
    for text in input_text:
        words = text.split(" ")
        words = [w for w in words if w in D.index]
        vec = [D.loc[w]['id'] for w in words]
        # do padding 
        if len (vec) > vec_len:
            vec = vec[:vec_len]
        if len (vec) < vec_len:
            vec = vec + [0] * (vec_len - len(vec))
        text_vecs.append (vec)
    return np.array (text_vecs)     


class Infr_model :
    def __init__(self, model_path, num_input_tokens, num_output_tokens, latent_dim, vocab_in, vocab_out):
        self.model_path = model_path 
        self.latent_dim = latent_dim 
        self.num_input_tokens  = num_input_tokens
        self.num_output_tokens = num_output_tokens 
        self.model = load_model(model_path)
        self.vocab_in = vocab_in 
        self.vocab_out = vocab_out 

       # Get the encoder model 
        encoder_inputs = Input(shape=(None, num_input_tokens),name='Encoder-Input-Layer')
        encoder_lstm = self.model.get_layer('Encoder-LSTM-Layer')
        encoder_outputs, state_h, state_c = encoder_lstm(encoder_inputs)
        self.encoder_model  = Model (encoder_inputs, encoder_outputs)

        print(self.encoder_model.summary())
               
        decoder_inputs = Input(shape=(None, num_output_tokens),name='Decoder-Input-Layer')
        
        decoder_state_input_h = Input(shape=(self.latent_dim,))
        decoder_state_input_c = Input(shape=(self.latent_dim,))

        decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
        decoder_lstm = self.model.get_layer ('Decoder-LSTM-Layer')
        decoder_dense = self.model.get_layer ('Decoder-Dense-Layer')
        
        decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
        decoder_states = [state_h, state_c]
        decoder_outputs = decoder_dense(decoder_outputs)

        self.decoder_model = Model([decoder_inputs] + decoder_states_inputs,[decoder_outputs] + decoder_states, name='Decoder-Model')
        
        print(self.decoder_model.summary())
        
    def predict (self, inp_vecs):
        max_decoder_seq_len = 10 

        enc_token2id, enc_id2token, enc_prob = get_lookup (self.vocab_in)
        dec_token2id, dec_id2token, dec_prob = get_lookup (self.vocab_out)
     
        dec_token2id['__START__'] = 1
          
        states_value = self.encoder_model.predict(inp_vecs)
        print("h=",states_value) 
        
        #Generate empty target sequence of length 1.
        target_seq = np.zeros((1, self.num_output_tokens))

        #Get the first character of target sequence with the start character.
        target_seq[0, dec_token2id['__START__']] = 1.

        #Sampling loop for a batch of sequences
        #(to simplify, here we assume a batch of size 1).
    
        stop_condition = False
        decoded_sentence = ' '
    
        while not stop_condition:
            print("target_seq:",target_seq)
            print("staes_value=",states_value)
            output_tokens, h, c = self.decoder_model.predict([target_seq] + states_value)
            preds  =  output_tokens[0, 0,:]
            #preds  = [preds[i] / dec_prob[i] for i in range (0, len (preds))]
            print("preds=",preds)
        
            #Sample a token
            sampled_token_index = np.argmax(preds)
        
            sampled_word = dec_id2token[sampled_token_index]
            #if sampled_word not in decoded_sentence.split(" ") or np.random.random([10])[0] > 0.25:  
            #    decoded_sentence = decoded_sentence + "  " +  sampled_word
            #print("decoced sent:",decoded_sentence)

            #if len(decoded_sentence.split(" ")) > max_decoder_seq_len :
            #    stop_condition = True 
            #if sampled_word == '__STOP__':
            #    stop_condition = True 
            #Update the target sequence (of length 1).
            target_seq = np.zeros((1, 1, num_decoder_tokens))
            target_seq[0, 0, sampled_token_index] = 1.
            #Update states
            states_value = [h, c]
   
        return decoded_sentence  




if __name__ == "__main__":

     root_path = r"C:\Users\jayanti.prasad\Projects-Dev\Seq2Seq\tmp"
     model_path = r"C:\Users\jayanti.prasad\Projects-Dev\Seq2Seq\tmp\trained_model\model.hdf5"
     df = pd.read_csv(r"C:\Users\jayanti.prasad\Data\NLP_DATA\seq2seq_data\english-french-sentence-pairs.csv",encoding='utf-8')


     num_input_tokens = 210
     num_output_tokens = 172
     latent_dim = 60 

     df_inp = pd.read_csv (root_path + os.sep +"vocab_inp.csv",encoding='utf-8')
     df_out = pd.read_csv (root_path + os.sep + "vocab_out.csv", encoding='utf-8')

     df_inp = df_inp [df_inp ['count'] > 10]
     df_out = df_out [df_out ['count'] > 10] 
    
     I = Infr_model (model_path, num_input_tokens, num_output_tokens, latent_dim, df_inp, df_out) 

     texts = []
     for i in  [11,56, 87, 98]:
         text = df.iloc[i]['en']
         texts.append (text)

     input_vecs = text2vec (texts, df_inp, num_input_tokens, 8 )

     encoder_in_data = np.zeros((len(input_vecs), 8, num_input_tokens), dtype='float32')


     for i in range (0, len (input_vecs)):
           for j, token_id in enumerate (input_vecs[i]):
                encoder_in_data[i, j, token_id] = 1
    
     print(encoder_in_data)
    
     I.predict (encoder_in_data)
    

Model: "model_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Encoder-Input-Layer (Input  [(None, None, 210)]       0         
 Layer)                                                          
                                                                 
 Encoder-LSTM-Layer (LSTM)   [(None, 60),              65040     
                              (None, 60),                        
                              (None, 60)]                        
                                                                 
Total params: 65040 (254.06 KB)
Trainable params: 65040 (254.06 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Model: "Decoder-Model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to           

ValueError: operands could not be broadcast together with shapes (1,1,172) (4,60) 