In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense

english_sentences = ["Hello", "How are you?", "Translate this sentence"]
hindi_sentences = ["नमस्ते", "तुम कैसे हो?", "इस वाक्य का अनुवाद करें"]

english_tokenizer = {char: idx + 1 for idx, char in enumerate(set(' '.join(english_sentences)))}
hindi_tokenizer = {char: idx + 1 for idx, char in enumerate(set(' '.join(hindi_sentences)))}


max_english_sequence_length = max(len(sentence) for sentence in english_sentences)
max_hindi_sequence_length = max(len(sentence) for sentence in hindi_sentences)


def sentences_to_sequences(sentences, tokenizer, max_sequence_length):
    sequences = np.zeros((len(sentences), max_sequence_length))
    for i, sentence in enumerate(sentences):
        for j, char in enumerate(sentence):
            sequences[i, j] = tokenizer[char]
    return sequences


X_train = sentences_to_sequences(english_sentences, english_tokenizer, max_english_sequence_length)
y_train = sentences_to_sequences(hindi_sentences, hindi_tokenizer, max_hindi_sequence_length)

X_train = X_train.reshape((len(english_sentences), max_english_sequence_length, 1))
y_train = y_train.reshape((len(hindi_sentences), max_hindi_sequence_length, 1))


latent_dim = 256


encoder_inputs = Input(shape=(max_english_sequence_length, 1))
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_inputs)
encoder_states = [state_h, state_c]


decoder_inputs = Input(shape=(max_hindi_sequence_length, 1))
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(len(hindi_tokenizer) + 1, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)


model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

from tensorflow.keras.utils import to_categorical
y_train_one_hot = to_categorical(y_train, num_classes=len(hindi_tokenizer) + 1)


model.fit([X_train, y_train], y_train_one_hot, epochs=10, batch_size=64)

model.save('translation_model.h5')


Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.0290 - loss: 3.1471
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 183ms/step - accuracy: 0.4493 - loss: 2.8680
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 185ms/step - accuracy: 0.4348 - loss: 2.6263
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 187ms/step - accuracy: 0.4348 - loss: 2.3962
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 209ms/step - accuracy: 0.4493 - loss: 2.1581
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 197ms/step - accuracy: 0.4638 - loss: 1.9302
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 203ms/step - accuracy: 0.4783 - loss: 1.7948
Epoch 8/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 244ms/step - accuracy: 0.5072 - loss: 1.7160
Epoch 9/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

