In [8]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import json
import datetime

In [9]:


def load_translations(english_file, hindi_file):
    with open(english_file, 'r', encoding='utf-8') as file:
        english_sentences = file.read().strip().split('\n')
    with open(hindi_file, 'r', encoding='utf-8') as file:
        hindi_sentences = file.read().strip().split('\n')
    return dict(zip(english_sentences, hindi_sentences))

def translate_to_hindi(sentence, english_to_hindi):
    current_time = datetime.datetime.now().time()
    start_time = datetime.time(21, 0)
    end_time = datetime.time(22, 0)

    if not (start_time <= current_time <= end_time):
        print("Translation feature is available only between 9 PM and 10 PM.")
        return

    if len(sentence) < 2:
        print("Error: Word length must be at least 2 letters.")
        return

    if sentence[0].lower() in 'aeiou' and not (start_time <= current_time <= end_time):
        print("This word starts with Vowels, provide some other words.")
        return

    hindi_translation = english_to_hindi.get(sentence.lower(), "Translation not found.")
    print(f"English Sentence: {sentence}")
    print(f"Hindi Translation --> {hindi_translation}")

In [10]:


def create_tokenizer_and_vocab(sentences, file_path_prefix):
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(sentences)
    with open(f"{file_path_prefix}_tokenizer.json", 'w', encoding='utf-8') as f:
        f.write(json.dumps(tokenizer.to_json(), ensure_ascii=False))
    vocab = tokenizer.word_index
    with open(f"{file_path_prefix}_vocab.json", 'w', encoding='utf-8') as f:
        f.write(json.dumps(vocab, ensure_ascii=False))
    return tokenizer, vocab

def create_tensorflow_model(english_sentences, hindi_sentences):
    english_tokenizer, english_vocab = create_tokenizer_and_vocab(english_sentences, "english")
    hindi_tokenizer, hindi_vocab = create_tokenizer_and_vocab(hindi_sentences, "hindi")
    
    english_sequences = english_tokenizer.texts_to_sequences(english_sentences)
    hindi_sequences = hindi_tokenizer.texts_to_sequences(hindi_sentences)
    
    max_sequence_length = max(max(len(seq) for seq in english_sequences), max(len(seq) for seq in hindi_sequences))
    
    english_sequences = pad_sequences(english_sequences, maxlen=max_sequence_length, padding='post')
    hindi_sequences = pad_sequences(hindi_sequences, maxlen=max_sequence_length, padding='post')
    
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(input_dim=len(english_vocab) + 1, output_dim=128, input_length=max_sequence_length),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(len(hindi_vocab) + 1, activation='softmax')
    ])
    
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.summary()
    return model, english_sequences, hindi_sequences




In [11]:
# Save the model
def save_model(model, model_file):
    model.save(model_file)
    print(f"Model saved to {model_file}")

# Load the model
def load_model_from_file(model_file):
    model = load_model(model_file)
    print(f"Model loaded from {model_file}")
    return model

# Usage example
english_file = "C:\\Users\\Jay\\Machine_Translation-main\\assignment--7-nullclass--main\\data7\\english.txt"
hindi_file = "C:\\Users\\Jay\\Machine_Translation-main\\assignment--7-nullclass--main\\data7\\hindi.txt"
english_to_hindi = load_translations(english_file, hindi_file)
english_sentences = list(english_to_hindi.keys())
hindi_sentences = list(english_to_hindi.values())
model, english_sequences, hindi_sequences = create_tensorflow_model(english_sentences, hindi_sentences)
model_file = "translation_model.h5"
save_model(model, model_file)
loaded_model = load_model_from_file(model_file)
sentence = "Playground"
translate_to_hindi(sentence, english_to_hindi)

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 10, 128)           2675840   
                                                                 
 bidirectional_8 (Bidirectio  (None, 10, 128)          98816     
 nal)                                                            
                                                                 
 bidirectional_9 (Bidirectio  (None, 128)              98816     
 nal)                                                            
                                                                 
 dense_8 (Dense)             (None, 64)                8256      
                                                                 
 dense_9 (Dense)             (None, 14003)             910195    
                                                                 
Total params: 3,791,923
Trainable params: 3,791,923
No