In [None]:
from pickle import load
from pickle import dump
from collections import Counter
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, GRU, TimeDistributed, Dropout, Embedding, Bidirectional, LSTM
from tensorflow.keras.models import load_model
import numpy as np

In [None]:
def load_data(path):
#     return load(open(path, 'rb'))
    with open(path) as f:
        data = f.read()
    return data.split('\n')

In [None]:
en_sentences = load_data('data\\small_vocab_en')
fr_sentences = load_data('data\\small_vocab_fr')

In [None]:
def tokenize_text(sentences):
    token = Tokenizer()
    token.fit_on_texts(sentences)
    sequences = token.texts_to_sequences(sentences)
    word_index = token.word_index
    return sequences, word_index, token

# en_sequences, en_word_index = tokenize_text(en_sentences)
# print(en_sequences[0],'\n', en_word_index)

In [None]:
def pad_sequence(sequences, length = None):
    return pad_sequences(sequences, maxlen = length, padding = 'post')
# en_Psequences = pad_sequence(en_sequences)
# en_Psequences.shape[1:]

In [None]:
def preprocess_sequence(sentences, pad_size = None):
    sequences, word_index, token = tokenize_text(sentences)
    Psequences = pad_sequence(sequences, pad_size)
    return Psequences, word_index, token


en_Psequences, en_word_index, en_token = preprocess_sequence(en_sentences)
fr_Psequences, fr_word_index, fr_token = preprocess_sequence(fr_sentences)
fr_Psequences = fr_Psequences.reshape(*fr_Psequences.shape,1)





### Model  : GRU with custom Embedding and Bidirectional

In [None]:
def GRU_with_EB(en_vocab_size, input_shape, fr_vocab_size):
    model = Sequential()
    model.add(Embedding(input_dim = en_vocab_size + 1, output_dim = 10, input_length = input_shape[1], input_shape = input_shape[1:]))
    model.add(Bidirectional(GRU(128, return_sequences = True)))
    model.add(TimeDistributed(Dense(128, activation = 'relu')))
    model.add(Dropout(0.4))
    model.add(TimeDistributed(Dense(fr_vocab_size + 1, activation = 'softmax')))
    
    model.compile(optimizer = 'Adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
    return model
    
    
    
en_Psequences = pad_sequence(en_Psequences, fr_Psequences.shape[1])
# en_Psequences = en_Psequences.reshape((-1, fr_Psequences.shape[-2]))
GRU_EB_model = GRU_with_EB(len(en_word_index), en_Psequences.shape, len(fr_word_index))
GRU_EB_model.summary()

result = GRU_EB_model.fit(en_Psequences, fr_Psequences, batch_size = 512, epochs = 100, validation_split = 0.2, verbose = 1)

In [1]:
def process_input(text, token, pad_size = None):
#     token.fit_on_texts(text)
    sequences = token.texts_to_sequences(text)
    Psequences = pad_sequences(sequences, maxlen = pad_size, padding = 'post')
    return Psequences

In [2]:
def en_fr_translation(logits, token):
    index_word = {id : word for word, id in token.word_index.items()}
#     index_word[0] = '<PAD>'
    return ' '.join([index_word.get(pred,'') for pred in np.argmax(logits, 1)])

In [None]:
input_sentence = input("Enter a Enter Sentence :\n")

input_text = []
input_text.append(input_sentence)
Pinput_text = process_input(input_text, en_token, 21)
print("\nTranslated French Sentence : \n", en_fr_translation(GRU_EB_model.predict(Pinput_text)[0], fr_token))