In [1]:
import numpy as np
from keras.layers import LSTM,Input,Dense
from keras.models import Model
import random
from discordwebhook import Discord
from keras.models import load_model

Loading the Dataset

In [2]:
# Vectorize the data.
input_texts = []
target_texts = []
input_characters = set()
target_characters = set()
with open(r"D:\Datasets\fra-eng\fra.txt", 'r', encoding='utf-8') as f:
    lines = f.read().split('\n')
for line in lines[: min(10000, len(lines) - 1)]:
    input_text, target_text, _ = line.split('\t')
    # We use "tab" as the "start sequence" character
    # for the targets, and "\n" as "end sequence" character.
    target_text = '\t' + target_text + '\n'
    input_texts.append(input_text)
    target_texts.append(target_text)
    for char in input_text:
        if char not in input_characters:
            input_characters.add(char)
    for char in target_text:
        if char not in target_characters:
            target_characters.add(char)

In [3]:
print(input_texts[150])
print(target_texts[150])

Try it.
	Essayez.



In [4]:
english_characters = sorted(input_characters)
french_characters = sorted(target_characters)

num_encoder_tokens = len(english_characters)
num_decoder_tokens = len(french_characters)

max_english_sentence_len = max([len(sent) for sent in input_texts])
max_french_sentence_len = max([len(sent) for sent in target_texts])

num_pairs_english = len(input_texts)
num_pairs_french = len(target_texts)

In [5]:
print("Total English Characters : ",num_encoder_tokens)
print("Total French Characters : ",num_decoder_tokens)
print("Maximum English Sentences Length : ",max_english_sentence_len)
print("Maximum French Sentences Length : ",max_french_sentence_len)

Total English Characters :  70
Total French Characters :  93
Maximum English Sentences Length :  14
Maximum French Sentences Length :  59


In [6]:
encoder_input_data = np.zeros((num_pairs_english,max_english_sentence_len,num_encoder_tokens),dtype='float32')
decoder_input_data = np.zeros((num_pairs_french,max_french_sentence_len,num_decoder_tokens),dtype='float32')
decoder_output_data = np.zeros((num_pairs_french,max_french_sentence_len,num_decoder_tokens),dtype='float32')

In [7]:
combined = list(zip(encoder_input_data,decoder_input_data,decoder_output_data))

random.shuffle(combined)

# Split the shuffled data back into separate arrays
encoder_input_data, decoder_input_data, decoder_output_data = zip(*combined)

# Convert the arrays back to NumPy arrays if needed
encoder_input_data = np.array(encoder_input_data)
decoder_input_data = np.array(decoder_input_data)
decoder_output_data = np.array(decoder_output_data)

In [8]:
print(encoder_input_data.shape)
print(decoder_input_data.shape)
print(decoder_output_data.shape)


(10000, 14, 70)
(10000, 59, 93)
(10000, 59, 93)


In [9]:
input_index = dict((char,i) for i, char in enumerate(english_characters))
target_index = dict((char,i) for i, char in enumerate(french_characters))


reverse_input_index = dict((i,char) for char,i in input_index.items())
reverse_target_index = dict((i,char) for char,i in target_index.items())

In [10]:
input_index

{' ': 0,
 '!': 1,
 '"': 2,
 '$': 3,
 '%': 4,
 '&': 5,
 "'": 6,
 ',': 7,
 '-': 8,
 '.': 9,
 '0': 10,
 '1': 11,
 '2': 12,
 '3': 13,
 '5': 14,
 '7': 15,
 '8': 16,
 '9': 17,
 ':': 18,
 '?': 19,
 'A': 20,
 'B': 21,
 'C': 22,
 'D': 23,
 'E': 24,
 'F': 25,
 'G': 26,
 'H': 27,
 'I': 28,
 'J': 29,
 'K': 30,
 'L': 31,
 'M': 32,
 'N': 33,
 'O': 34,
 'P': 35,
 'Q': 36,
 'R': 37,
 'S': 38,
 'T': 39,
 'U': 40,
 'V': 41,
 'W': 42,
 'Y': 43,
 'a': 44,
 'b': 45,
 'c': 46,
 'd': 47,
 'e': 48,
 'f': 49,
 'g': 50,
 'h': 51,
 'i': 52,
 'j': 53,
 'k': 54,
 'l': 55,
 'm': 56,
 'n': 57,
 'o': 58,
 'p': 59,
 'q': 60,
 'r': 61,
 's': 62,
 't': 63,
 'u': 64,
 'v': 65,
 'w': 66,
 'x': 67,
 'y': 68,
 'z': 69}

In [11]:
target_index

{'\t': 0,
 '\n': 1,
 ' ': 2,
 '!': 3,
 '%': 4,
 '&': 5,
 "'": 6,
 '(': 7,
 ')': 8,
 ',': 9,
 '-': 10,
 '.': 11,
 '0': 12,
 '1': 13,
 '2': 14,
 '3': 15,
 '5': 16,
 '8': 17,
 '9': 18,
 ':': 19,
 '?': 20,
 'A': 21,
 'B': 22,
 'C': 23,
 'D': 24,
 'E': 25,
 'F': 26,
 'G': 27,
 'H': 28,
 'I': 29,
 'J': 30,
 'K': 31,
 'L': 32,
 'M': 33,
 'N': 34,
 'O': 35,
 'P': 36,
 'Q': 37,
 'R': 38,
 'S': 39,
 'T': 40,
 'U': 41,
 'V': 42,
 'W': 43,
 'Y': 44,
 'a': 45,
 'b': 46,
 'c': 47,
 'd': 48,
 'e': 49,
 'f': 50,
 'g': 51,
 'h': 52,
 'i': 53,
 'j': 54,
 'k': 55,
 'l': 56,
 'm': 57,
 'n': 58,
 'o': 59,
 'p': 60,
 'q': 61,
 'r': 62,
 's': 63,
 't': 64,
 'u': 65,
 'v': 66,
 'w': 67,
 'x': 68,
 'y': 69,
 'z': 70,
 '\xa0': 71,
 '«': 72,
 '»': 73,
 'À': 74,
 'Ç': 75,
 'É': 76,
 'Ê': 77,
 'à': 78,
 'â': 79,
 'ç': 80,
 'è': 81,
 'é': 82,
 'ê': 83,
 'î': 84,
 'ï': 85,
 'ô': 86,
 'ù': 87,
 'û': 88,
 'œ': 89,
 '\u2009': 90,
 '’': 91,
 '\u202f': 92}

In [12]:
for i,(input_text,target_text) in enumerate(zip(input_texts,target_texts)):
    for t,char in enumerate(input_text):
        encoder_input_data[i,t,input_index[char]] = 1
    encoder_input_data[i, t + 1:, input_index[' ']] = 1
    for t,char in enumerate(target_text):
        decoder_input_data[i,t,target_index[char]] = 1

        if t > 0:
            decoder_output_data[i,t-1,target_index[char]] = 1
    decoder_input_data[i, t + 1:, target_index[' ']] = 1
    decoder_output_data[i, t:, target_index[' ']] = 1


Encoder Decoder Model

In [13]:
# encoder_inputs = Input(shape=(None,num_encoder_tokens),name="encoder input")
# encoder = LSTM(256,return_state=True,name="encoder")
# encoder_outputs,state_h,state_c = encoder(encoder_inputs)
# encoder_states = [state_h,state_c]


# decoder_inputs = Input(shape=(None,num_decoder_tokens),name="decoder input")
# decoder = LSTM(256,return_sequences=True,return_state=True,name="decoder")
# decoder_outputs,_,_ = decoder(decoder_inputs,initial_state=encoder_states)
# decoder_dense = Dense(num_decoder_tokens,name="decoder_Dense",activation='softmax')
# decoder_outputs = decoder_dense(decoder_outputs)

In [14]:
# model = Model([encoder_inputs,decoder_inputs],decoder_outputs)

In [15]:
# model.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['accuracy'])

In [16]:
# model.fit([encoder_input_data, decoder_input_data], decoder_output_data,
#           batch_size=64,
#           epochs=100,
#           validation_split=0.2,verbose=1)

In [17]:
# model.save(r"D:\vs code\python\DeepLearning\Projects\LanguageTranslation\eng_fre4.h5")

In [18]:
model_1 = load_model(r"D:\vs code\python\DeepLearning\Projects\LanguageTranslation\eng_fre5.h5")

In [19]:
for layer in model_1.layers:
    print(layer)

<keras.src.engine.input_layer.InputLayer object at 0x00000183EDE50050>
<keras.src.engine.input_layer.InputLayer object at 0x00000183F5A7EF50>
<keras.src.layers.rnn.lstm.LSTM object at 0x00000183EDEA5390>
<keras.src.layers.rnn.lstm.LSTM object at 0x00000183D625D8D0>
<keras.src.layers.core.dense.Dense object at 0x00000183F5A8CC90>


In [20]:
model_1.summary()

Model: "model_9"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 encoder input (InputLayer)  [(None, None, 70)]           0         []                            
                                                                                                  
 decoder input (InputLayer)  [(None, None, 93)]           0         []                            
                                                                                                  
 encoder (LSTM)              [(None, 256),                334848    ['encoder input[0][0]']       
                              (None, 256),                                                        
                              (None, 256)]                                                        
                                                                                            

In [21]:
# Define sampling models
# encoder_model = Model(encoder_inputs,encoder_states)



encoder_inputs = Input(shape=(None,num_encoder_tokens),name="encoder input")
# encoder = LSTM(256,return_state=True,name="encoder")
encoder = model_1.layers[2]
encoder_outputs,state_h,state_c = encoder(encoder_inputs)
encoder_states = [state_h,state_c]


encoder_model = Model(encoder_inputs,encoder_states)


decoder_state_h = Input(shape=(256,))
decoder_state_c = Input(shape=(256,))

decoder_initial_states = [decoder_state_h,decoder_state_c]


decoder_lstm = model_1.layers[3]

decoder_output,state_h,state_c = decoder_lstm(model_1.layers[1].input,initial_state=decoder_initial_states)

decoder_states = [state_h,state_c]

decoder_outputs = model_1.layers[4](decoder_output)

decoder_model = Model([model_1.layers[1].input] + decoder_initial_states,[decoder_outputs] + decoder_states)


reverse_input_char_index = dict(
    (i, char) for char, i in input_index.items())
reverse_target_char_index = dict(
    (i, char) for char, i in target_index.items())

In [22]:
def decode_sequence(input_seq):
    states_value = encoder_model.predict(input_seq)

    target_sequence = np.zeros((1,1,num_decoder_tokens))
    target_sequence[0,0,target_index['\t']] = 1

    stop_condition = False
    decoded_sentence = ''

    while not stop_condition:
        output,h,c = decoder_model.predict([target_sequence] + states_value)
        
        output_token_index = np.argmax(output[0,-1,:])
        sampled_char = reverse_target_char_index[output_token_index]
        decoded_sentence += sampled_char

        if sampled_char == '\n'  or len(decoded_sentence) > max_french_sentence_len:
            stop_condition = True

        target_sequence = np.zeros((1,1,num_decoder_tokens))
        target_sequence[0,0,output_token_index] = 1

        states_value = [h,c]
    return decoded_sentence


In [25]:
for seq_index in range(50):
    # Take one sequence (part of the training set)
    # for trying out decoding.
    input_seq = encoder_input_data[seq_index: seq_index + 1]
    decoded_sentence = decode_sequence(input_seq)
    print('-')
    print('Input sentence:', input_texts[seq_index])
    print('Decoded sentence:', decoded_sentence)

-
Input sentence: Go.
Decoded sentence: Pars !

-
Input sentence: Go.
Decoded sentence: Pars !

-
Input sentence: Go.
Decoded sentence: Pars !

-
Input sentence: Go.
Decoded sentence: Pars !

-
Input sentence: Hi.
Decoded sentence: Salut !

-
Input sentence: Hi.
Decoded sentence: Salut !

-
Input sentence: Run!
Decoded sentence: Fuyez !

-
Input sentence: Run!
Decoded sentence: Fuyez !

-
Input sentence: Run!
Decoded sentence: Fuyez !

-
Input sentence: Run!
Decoded sentence: Fuyez !

-
Input sentence: Run!
Decoded sentence: Fuyez !

-
Input sentence: Run!
Decoded sentence: Fuyez !

-
Input sentence: Run!
Decoded sentence: Fuyez !

-
Input sentence: Run!
Decoded sentence: Fuyez !

-
Input sentence: Run.
Decoded sentence: Fuyons !

-
Input sentence: Run.
Decoded sentence: Fuyons !

-
Input sentence: Run.
Decoded sentence: Fuyons !

-
Input sentence: Run.
Decoded sentence: Fuyons !

-
Input sentence: Run.
Decoded sentence: Fuyons !

-
Input sentence: Run.
Decoded sentence: Fuyons !

-
In

In [24]:
# discord = Discord(url="https://discord.com/api/webhooks/1159367662258892810/ay4nRvPFhN7qxYAJoa2lMKjZFTwljCvfPBHFs9bkK5amQ96QYCGblxvzgzmiJxnHVdMx")
# discord.post(content="Process Completed")