Importing Libraries

In [6]:
import numpy as np
import pandas as pd
import nltk
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense,LSTM,Input,Attention,TimeDistributed,Dot,Activation
import tensorflow as tf
import random

Loading the eng-spanish translation file

In [7]:
# with open(r"/kaggle/input/eng-spanish/spa.txt",'r', encoding='utf-8') as file:
with open(r"D:\Datasets\spa-eng\spa.txt",'r', encoding='utf-8') as file:
    lines = file.read().split('\n')

In [8]:
input_texts = []
target_texts = []

for line in lines[: min(10000, len(lines) - 1)]:
    line = line.split('\t')
    input_texts.append(line[0])
    line[1] = '\t' + line[1] + '\n'
    target_texts.append(line[1])

Creating corpus for input and target characters

In [9]:
input_characters = set()
target_characters = set()

for sentence in input_texts:
    for char in sentence:
        if char not in input_characters:
            input_characters.add(char)

for sentence in target_texts:
    for char in sentence:
        if char not in target_characters:
            target_characters.add(char)

input_characters = sorted(input_characters)
target_characters = sorted(target_characters)


len_input_tokens = len(input_characters)
len_target_tokens = len(target_characters)

Finding Maximum Sentence Length

In [10]:
max_input_sentence_length = max([len(sent) for sent in input_texts])
max_target_sentence_length = max([len(sent) for sent in target_texts])

print(max_input_sentence_length)
print(max_target_sentence_length)

total_input_sentence = len(input_texts)
total_target_sentence = len(target_texts)

17
42


In [11]:
print("Total input Tokens :: ",len_input_tokens)
print("Total target Tokens :: ",len_target_tokens)
print("max input length :: ",max_input_sentence_length)
print("max target length :: ",max_target_sentence_length)

Total input Tokens ::  71
Total target Tokens ::  86
max input length ::  17
max target length ::  42


In [12]:
encoder_input_data = np.zeros((total_input_sentence,max_input_sentence_length,len_input_tokens),dtype='float32')
decoder_input_data = np.zeros((total_target_sentence,max_target_sentence_length,len_target_tokens),dtype='float32')
decoder_target_data = np.zeros((total_target_sentence,max_target_sentence_length,len_target_tokens),dtype='float32')

Creating a dictionaries for input and target characters

In [13]:
input_token_index = dict((char,i) for i,char in enumerate(input_characters))
target_token_index = dict((char,i) for i,char in enumerate(target_characters))

In [14]:
for i,(input_text,target_text) in enumerate(zip(input_texts,target_texts)):
    for t,char in enumerate(input_text):
        encoder_input_data[i,t,input_token_index[char]] = 1
    encoder_input_data[i,t+1:,input_token_index[' ']] = 1
    for t, char in enumerate(target_text):
        decoder_input_data[i,t,target_token_index[char]] = 1
        if t > 0:
            decoder_target_data[i,t-1,target_token_index[char]] = 1
    decoder_input_data[i,t+1:,target_token_index[' ']] = 1
    decoder_target_data[i,t:,target_token_index[' ']] = 1

In [10]:
combined = list(zip(encoder_input_data,decoder_input_data,decoder_target_data))

random.shuffle(combined)

# Split the shuffled data back into separate arrays
encoder_input_data, decoder_input_data, decoder_target_data = zip(*combined)

# Convert the arrays back to NumPy arrays if needed
encoder_input_data = np.array(encoder_input_data)
decoder_input_data = np.array(decoder_input_data)
decoder_target_data = np.array(decoder_target_data)

In [11]:
decoder_target_data.shape

(10000, 42, 86)

Creating Encoder Decoder Model with Attention

In [12]:
# Encoder
encoder_inputs = Input(shape=(None,len_input_tokens),name="Encoder Input")

encoder_lstm = LSTM(512,return_sequences=True,return_state=True, name="Encoder") # LSTM Layer

encoder_output,state_h,state_c = encoder_lstm(encoder_inputs)

print(encoder_output)

encoder_states = [state_h,state_c]

# Decoder
decoder_inputs = Input(shape=(None,len_target_tokens),name="Decoder Input")

decoder_lstm = LSTM(512, return_sequences= True, return_state=True, name="Decoder")

decoder_output,_,_ = decoder_lstm(decoder_inputs,initial_state=encoder_states)

attention = Attention()([decoder_output,encoder_output])

outputs = tf.concat([decoder_output,attention],axis=-1)

# decoder_lstm_output_dropout = Dropout(0.5)(outputs)

# Dense Layer
decoder_dense = Dense(len_target_tokens,activation='softmax', name="Decoder_Dense_Layer")

dense_time = TimeDistributed(decoder_dense,name="final_layer")

outputs = dense_time(outputs)

KerasTensor(type_spec=TensorSpec(shape=(None, None, 512), dtype=tf.float32, name=None), name='Encoder/PartitionedCall:1', description="created by layer 'Encoder'")


Adding Custom Attention mechanism into the model

In [93]:
# Encoder
encoder_inputs = Input(shape=(None,len_input_tokens),name="Encoder Input")

encoder_lstm = LSTM(512,return_sequences=True,return_state=True, name="Encoder") # LSTM Layer

encoder_output,state_h,state_c = encoder_lstm(encoder_inputs)

encoder_states = [state_h,state_c]

# Decoder
decoder_inputs = Input(shape=(None,len_target_tokens),name="Decoder Input")

decoder_lstm = LSTM(512, return_sequences= True, return_state=True, name="Decoder")

decoder_output,_,_ = decoder_lstm(decoder_inputs,initial_state=encoder_states)

# Custom attention mechanism

dot_layer = Dot(axes=(2,2))([decoder_output,encoder_output])

attention_layer = Activation('softmax')(dot_layer)

attention_vec = Dot(axes=(2,1))([attention_layer,encoder_output])

print(attention.shape)

#-------------------------------------------------------------------------------------------------------------------

outputs = tf.concat([decoder_output,attention_vec],axis=-1)

# decoder_lstm_output_dropout = Dropout(0.5)(outputs)

# Dense Layer
decoder_dense = Dense(len_target_tokens,activation='softmax', name="Decoder_Dense_Layer")

dense_time = TimeDistributed(decoder_dense,name="final_layer")

outputs = dense_time(outputs)

(None, None, None)


In [60]:
model = Model([encoder_inputs,decoder_inputs],outputs)

In [14]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 Encoder Input (InputLayer)  [(None, None, 71)]           0         []                            
                                                                                                  
 Decoder Input (InputLayer)  [(None, None, 86)]           0         []                            
                                                                                                  
 Encoder (LSTM)              [(None, None, 512),          1196032   ['Encoder Input[0][0]']       
                              (None, 512),                                                        
                              (None, 512)]                                                        
                                                                                              

In [61]:
model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy'])

model.fit([encoder_input_data,decoder_input_data],decoder_target_data,batch_size=64,epochs=60,validation_split=0.2)

Epoch 1/60
Epoch 2/60
Epoch 3/60

KeyboardInterrupt: 

In [None]:
model.save("/kaggle/working/eng_spa.h5")

  saving_api.save_model(


In [16]:
from tensorflow.keras.models import load_model

saved_model = load_model(r"D:\vs code\python\DeepLearning\Projects\eng_spanish_translation\eng_spa2.h5")
# saved_model = load_model("/kaggle/working/eng_spa.h5")


Creating Custom Encoder Decoder Model

In [17]:
for layer in saved_model.layers:
    print(layer)

<keras.src.engine.input_layer.InputLayer object at 0x0000016F7F2CF350>
<keras.src.engine.input_layer.InputLayer object at 0x0000016F7B6D5A90>
<keras.src.layers.rnn.lstm.LSTM object at 0x0000016F7F378790>
<keras.src.layers.rnn.lstm.LSTM object at 0x0000016F7F3A4950>
<keras.src.layers.merging.dot.Dot object at 0x0000016F7F332110>
<keras.src.layers.core.activation.Activation object at 0x0000016F798D4B50>
<keras.src.layers.merging.dot.Dot object at 0x0000016F7F34F990>
<keras.src.layers.core.tf_op_layer.TFOpLambda object at 0x0000016F7F3A3550>
<keras.src.layers.regularization.dropout.Dropout object at 0x0000016F7F471B10>
<keras.src.layers.rnn.time_distributed.TimeDistributed object at 0x0000016F7F3326D0>


In [18]:
saved_model.summary()

Model: "model_9"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 Encoder Input (InputLayer)  [(None, None, 71)]           0         []                            
                                                                                                  
 Decoder Input (InputLayer)  [(None, None, 86)]           0         []                            
                                                                                                  
 Encoder (LSTM)              [(None, None, 256),          335872    ['Encoder Input[0][0]']       
                              (None, 256),                                                        
                              (None, 256)]                                                        
                                                                                            

In [19]:
# # Encoder
# encoder_input = saved_model.layers[0].input
# encoder_lstm = saved_model.layers[2]
# encoder_output,state_h,state_c = encoder_lstm(encoder_input)
# encoder_states = [state_h,state_c]

# encoder_model = Model(encoder_input,[encoder_output,encoder_states])

# # Decoder

# decoder_initial_state_h = Input(shape=(512,))
# decoder_initial_state_c = Input(shape=(512,))
# encoder_output_2 = Input(shape=(512,))

# decoder_initial_states = [decoder_initial_state_h,decoder_initial_state_c]

# decoder_input = saved_model.layers[1].input
# decoder_lstm = saved_model.layers[3]
# decoder_output,state_h,state_c = decoder_lstm(decoder_input, initial_state=decoder_initial_states)

# decoder_states=[state_h,state_c]

# attention_layer = saved_model.layers[4]

# # attention = attention_layer
# attention = attention_layer([decoder_output,encoder_output_2])

# concatenation_layer = saved_model.layers[5]

# outputs = concatenation_layer([decoder_output,attention],axis=-1)

# dense_layer = saved_model.layers[6]

# outputs = dense_layer(outputs)

# decoder_model = Model([decoder_input,encoder_output_2,decoder_initial_states],[outputs] + decoder_states)

Implementing Attention on Inference Model

In [57]:
# Encoder

encoder_inputs = saved_model.layers[0].input
encoder_output,state_h,state_c = saved_model.layers[2](encoder_inputs)  # encoder lstm
encoder_states = [state_h,state_c]  # encoder states

encoder_model = Model(encoder_inputs,[encoder_output,encoder_states])

# decoder

decoder_state_h = Input(shape=(256,))
decoder_state_c = Input(shape=(256,))

decoder_initial_states = [decoder_state_h,decoder_state_c]

decoder_inputs = saved_model.layers[1].input
decoder_output,decoder_state_h,decoder_state_c = saved_model.layers[3](decoder_inputs,initial_state=decoder_initial_states)  # decoder lstm
decoder_states = [decoder_state_h,decoder_state_c]  # decoder states

# Implementing Attention

dot_product = saved_model.layers[4]([decoder_output,encoder_output])

attention = saved_model.layers[5](dot_product)

attention_vec = saved_model.layers[6]([attention,encoder_output])

context_vector = saved_model.layers[7]([decoder_output,attention_vec],axis=-1)

dropout = saved_model.layers[8](context_vector)

outputs = saved_model.layers[9](dropout)

In [58]:
decoder_model = Model([decoder_inputs,decoder_initial_states,encoder_output],[outputs,decoder_states])

In [59]:
reverse_target_index = dict((i,char) for char,i in target_token_index.items())

In [62]:
def decode(sentence):
#     sentence_encoded = np.zeros((1,max_input_length,len_input_tokens))
    
# # one hot encoding the input sentence
#     for i, char in enumerate(sentence):
#         sentence_encoded[0,i,input_token_index[char]] = 1
#     sentence_encoded[0,i+1:,input_token_index[' ']] = 1
    
# predicting the encoder states
    encoder_output,states = encoder_model.predict(sentence)
    decoded_sentence = ""
    decoded_input = np.zeros((1,1,len_target_tokens))
    decoded_input[0,0,target_token_index['\t']] = 1
    sample_character = ""
    stop_condition = False
    while not stop_condition:
        # output,state_h,state_c = decoder_model.predict([decoded_input,states,encoder_output])
        output,state_n = decoder_model.predict([decoded_input,states,encoder_output])
        token_index = np.argmax(output)
        sample_character = reverse_target_index[token_index]
        decoded_sentence += sample_character
        if sample_character == '\n' or len(decoded_sentence) > max_target_sentence_length:
            stop_condition = True
        decoded_input = np.zeros((1,1,len_target_tokens))
        decoded_input[0,0,token_index] = 1
        states = state_n
    return decoded_sentence


op = []

for seq_index in range(10):
    # Take one sequence (part of the training set)
    # for trying out decoding.
    input_seq = encoder_input_data[seq_index: seq_index + 1]
    decoded_sentence = decode(input_seq)
    print('-')
    # print('Input sentence:', input_texts[seq_index])
    # print('Decoded sentence:', decoded_sentence)
    op.append([input_texts[seq_index],decoded_sentence])

-
-
-
-
-
-
-
-
-
-


In [63]:
for i in op:
    print(i)

['Go.', 'Vete.\n']
['Go.', 'Vete.\n']
['Go.', 'Vete.\n']
['Go.', 'Vete.\n']
['Hi.', 'Hola.\n']
['Run!', '¡Corre!\n']
['Run.', 'Corred.\n']
['Who?', '¿Quieá el golpao?\n']
['Fire!', '¡Incendio!\n']
['Fire!', '¡Incendio!\n']
