In [1]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import json

In [2]:


def load_translations(english_file, french_file, hindi_file):
    with open(english_file, 'r', encoding='utf-8') as file:
        english_sentences = file.read().strip().split('\n')
    with open(french_file, 'r', encoding='utf-8') as file:
        french_sentences = file.read().strip().split('\n')
    with open(hindi_file, 'r', encoding='utf-8') as file:
        hindi_sentences = file.read().strip().split('\n')
    return dict(zip(english_sentences, french_sentences)), dict(zip(english_sentences, hindi_sentences))

def translate_to_french_and_hindi(sentence, english_to_french, english_to_hindi):
    if len(sentence) != 10:
        print("Error: Word length must be exactly 10 letters.")
        return
    french_translation = english_to_french.get(sentence)
    hindi_translation = english_to_hindi.get(sentence)
    print(f"Looking for translations of '{sentence}'...")
    print(f"Found French translation: {french_translation}")
    print(f"Found Hindi translation: {hindi_translation}\n")
    if french_translation and hindi_translation:
        print(f"English Sentence: {sentence}")
        print(f"French Translation --> {french_translation}")
        print(f"Hindi Translation  --> {hindi_translation}")
    else:
        print("Translation not found.")

In [3]:


def create_tokenizer_and_vocab(sentences, file_path_prefix):
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(sentences)
    with open(f"{file_path_prefix}_tokenizer.json", 'w', encoding='utf-8') as f:
        f.write(json.dumps(tokenizer.to_json(), ensure_ascii=False))
    vocab = tokenizer.word_index
    with open(f"{file_path_prefix}_vocab.json", 'w', encoding='utf-8') as f:
        f.write(json.dumps(vocab, ensure_ascii=False))
    return tokenizer, vocab

def create_tensorflow_model(english_sentences, french_sentences, hindi_sentences):
    english_tokenizer, english_vocab = create_tokenizer_and_vocab(english_sentences, "english")
    french_tokenizer, french_vocab = create_tokenizer_and_vocab(french_sentences, "french")
    hindi_tokenizer, hindi_vocab = create_tokenizer_and_vocab(hindi_sentences, "hindi")

    english_sequences = english_tokenizer.texts_to_sequences(english_sentences)
    french_sequences = french_tokenizer.texts_to_sequences(french_sentences)
    hindi_sequences = hindi_tokenizer.texts_to_sequences(hindi_sentences)

    max_sequence_length = max(max(len(seq) for seq in english_sequences),
                              max(len(seq) for seq in french_sequences),
                              max(len(seq) for seq in hindi_sequences))

    english_sequences = pad_sequences(english_sequences, maxlen=max_sequence_length, padding='post')
    french_sequences = pad_sequences(french_sequences, maxlen=max_sequence_length, padding='post')
    hindi_sequences = pad_sequences(hindi_sequences, maxlen=max_sequence_length, padding='post')

    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(input_dim=len(english_vocab) + 1, output_dim=128, input_length=max_sequence_length),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(len(french_vocab) + 1, activation='softmax')  # For French translation
    ])

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.summary()

    return model, english_sequences, french_sequences, hindi_sequences





In [4]:
# Save the model
def save_model(model, model_file):
    model.save(model_file)
    print(f"Model saved to {model_file}")

# Load the model
def load_model_from_file(model_file):
    model = load_model(model_file)
    print(f"Model loaded from {model_file}")
    return model


In [5]:
# Usage example
english_file = "data5/english.txt"
french_file = "data5/french.txt"
hindi_file = "data5/hindi.txt"
english_to_french, english_to_hindi = load_translations(english_file, french_file, hindi_file)

english_sentences = list(english_to_french.keys())
french_sentences = list(english_to_french.values())
hindi_sentences = list(english_to_hindi.values())

model, english_sequences, french_sequences, hindi_sequences = create_tensorflow_model(english_sentences, french_sentences, hindi_sentences)



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 10, 128)           2675840   
                                                                 
 bidirectional (Bidirectiona  (None, 10, 128)          98816     
 l)                                                              
                                                                 
 bidirectional_1 (Bidirectio  (None, 128)              98816     
 nal)                                                            
                                                                 
 dense (Dense)               (None, 64)                8256      
                                                                 
 dense_1 (Dense)             (None, 16924)             1100060   
                                                                 
Total params: 3,981,788
Trainable params: 3,981,788
Non-

In [6]:
model_file = "translation_model.h5"
save_model(model, model_file)

# Load the model
loaded_model = load_model_from_file(model_file)

# %% Example translation
sentence = "Playground"
translate_to_french_and_hindi(sentence, english_to_french, english_to_hindi)

Model saved to translation_model.h5
Model loaded from translation_model.h5
Looking for translations of 'Playground'...
Found French translation: Terrain de jeu
Found Hindi translation: खेल का मैदान

English Sentence: Playground
French Translation --> Terrain de jeu
Hindi Translation  --> खेल का मैदान
