In [47]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense



class Seq2Seq_Model:
    def __init__(self, **kwargs):
        self.num_encoder_tokens = kwargs ['num_encoder_tokens']
        self.num_decoder_tokens = kwargs ['num_decoder_tokens']
        self.latent_dim = kwargs['latent_dim']
        self.build_model() 
        self.build_encoder_decoder_models()
        
    
    def build_model (self, ):
        # Define an input sequence and process it.
        self.encoder_inputs = Input(shape=(None, self.num_encoder_tokens))
        encoder = LSTM(self.latent_dim, return_state=True,name='LSTM-1')
        encoder_outputs, state_h, state_c = encoder(self.encoder_inputs)
        # We discard `encoder_outputs` and only keep the states.
        self.encoder_states = [state_h, state_c]


        # Set up the decoder, using `encoder_states` as initial state.
        self.decoder_inputs = Input(shape=(None, self.num_decoder_tokens))
        # We set up our decoder to return full output sequences,
        # and to return internal states as well. We don't use the 

        # return states in the training model, but we will use them in inference.
        decoder_lstm = LSTM(self.latent_dim, return_sequences=True, return_state=True,name='LSTM-2')
        decoder_outputs, _, _ = decoder_lstm(self.decoder_inputs,
                                     initial_state=self.encoder_states)
        self.decoder_dense = Dense(self.num_decoder_tokens, activation='softmax')
        decoder_outputs = self.decoder_dense(decoder_outputs)

        # Define the model that will turn
        # `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
        self.model = Model([self.encoder_inputs, self.decoder_inputs], decoder_outputs, name='Seq2Seq-Model')
        
        print(self.model.summary())

    def build_encoder_decoder_models (self,):
        self.encoder_model = Model(self.encoder_inputs, self.encoder_states,name='Encoder-Model')

        decoder_state_input_h = Input(shape=(self.latent_dim,))
        decoder_state_input_c = Input(shape=(self.latent_dim,))

        decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
        decoder_outputs, state_h, state_c = decoder_lstm(self.decoder_inputs, initial_state=decoder_states_inputs)

        decoder_states = [state_h, state_c]
        decoder_outputs = self.decoder_dense(decoder_outputs)

        self.decoder_model = Model([self.decoder_inputs] + decoder_states_inputs,[decoder_outputs] + decoder_states, name='Decoder-Model')
       
       

    
    def train_model (self, X, y, batch_size, epochs):
        encoder_input_data = X
        decoder_input_data = Y[:, :-1]
        decoder_target_data = Y[:, 1:]
        

        # Run training
        self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
        hist = self.model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
          batch_size=batch_size, epochs=epochs, validation_split=0.2)
        return hist 
        
 
def decode_sequence(input_seq):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, num_decoder_tokens))
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0, target_token_index['\t']] = 1.

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + states_value)

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += sampled_char

        # Exit condition: either hit max length
        # or find stop character.
        if (sampled_char == '\n' or
           len(decoded_sentence) > max_decoder_seq_length):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, num_decoder_tokens))
        target_seq[0, 0, sampled_token_index] = 1.

        # Update states
        states_value = [h, c]

    return decoded_sentence


if __name__ == "__main__":

     S = Seq2Seq_Model(num_encoder_tokens = 20, num_decoder_tokens = 10, latent_dim = 60)

     print(S.encoder_model.summary())
     print(S.decoder_model.summary())

     num_encoder_tokens = 1000
     num_decoder_tokens = 2000

     #X  = np.random.randint (0, num_encoder_tokens-1, size = (1000, 20))
     #Y  = np.random.randint (0, num_encoder_tokens-1, size = (1000, 30))

     X = np.random.random ([1000,20])
     Y = np.random.random ([1000,10])
    

    
     print("Input data", X.shape, Y.shape)
    
     hist = S.train_model (X, Y, 60, 60)
     





Model: "Seq2Seq-Model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_97 (InputLayer)       [(None, None, 20)]           0         []                            
                                                                                                  
 input_98 (InputLayer)       [(None, None, 10)]           0         []                            
                                                                                                  
 LSTM-1 (LSTM)               [(None, 60),                 19440     ['input_97[0][0]']            
                              (None, 60),                                                         
                              (None, 60)]                                                         
                                                                                      

ValueError: Exception encountered when calling layer "lstm_3" (type LSTM).

Dimensions must be equal, but are 10 and 2000 for '{{node MatMul}} = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false](strided_slice_1, kernel)' with input shapes: [?,10], [2000,240].

Call arguments received by layer "lstm_3" (type LSTM):
  • inputs=['tf.Tensor(shape=(None, None, 10), dtype=float32)', 'tf.Tensor(shape=(None, 60), dtype=float32)', 'tf.Tensor(shape=(None, 60), dtype=float32)']
  • mask=None
  • training=None
  • initial_state=None