In [28]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Layer,Lambda,Concatenate,RepeatVector
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [29]:
english_sentences = [
    "hello",
    "how are you"
]

hindi_sentences = [
    "नमस्ते",
    "आप कैसे हैं"
]

In [30]:
# Add <start> and <end> tokens for decoder training
hindi_sentences = ["<start> " + sentence + " <end>" for sentence in hindi_sentences]

In [31]:
# ===============================
# 2. Tokenization and Padding
# ===============================
eng_tokenizer = Tokenizer(filters='')
eng_tokenizer.fit_on_texts(english_sentences)
eng_sequences = eng_tokenizer.texts_to_sequences(english_sentences)
max_eng_len = max(len(seq) for seq in eng_sequences)
eng_padded = pad_sequences(eng_sequences, maxlen=max_eng_len, padding='post')

hin_tokenizer = Tokenizer(filters='')
hin_tokenizer.fit_on_texts(hindi_sentences)
hin_sequences = hin_tokenizer.texts_to_sequences(hindi_sentences)
max_hin_len = max(len(seq) for seq in hin_sequences)
hin_padded = pad_sequences(hin_sequences, maxlen=max_hin_len, padding='post')

# Prepare decoder input and target sequences
decoder_input_data = hin_padded[:, :-1]
decoder_target_data = hin_padded[:, 1:]
## decoder_target_data = np.expand_dims(decoder_target_data, -1)

eng_vocab_size = len(eng_tokenizer.word_index) + 1
hin_vocab_size = len(hin_tokenizer.word_index) + 1

In [32]:
# ===============================
# 3. Bahdanau Attention Layer
# ===============================
class BahdanauAttention(Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)

    def call(self, query, values):
        # query: (batch_size, hidden_size)
        # values: (batch_size, seq_len, hidden_size)
        query_with_time_axis = tf.expand_dims(query, 1)
        score = self.V(tf.nn.tanh(self.W1(values) + self.W2(query_with_time_axis)))
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector, attention_weights

In [33]:
def repeat_vector(x):
    # x shape: (batch_size, 1, latent_dim)
    # tile it to (batch_size, time_steps, latent_dim)
    time_steps = tf.shape(decoder_outputs)[1]
    return tf.tile(x, [1, time_steps, 1])

In [38]:
# ===============================
# 4. Model Architecture
# ===============================
latent_dim = 256
max_decoder_seq_len = decoder_input_data.shape[1]

# Encoder
encoder_inputs = Input(shape=(None,))
enc_emb = Embedding(eng_vocab_size, latent_dim, mask_zero=True)(encoder_inputs)
encoder_outputs, state_h, state_c = LSTM(latent_dim, return_sequences=True, return_state=True)(enc_emb)

# Decoder
decoder_inputs = Input(shape=(None,))
dec_emb_layer = Embedding(hin_vocab_size, latent_dim, mask_zero=True)
dec_emb = dec_emb_layer(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=[state_h, state_c])

# Attention
attention = BahdanauAttention(latent_dim)
context_vector, attention_weights = attention(state_h, encoder_outputs)
## context_vector = Lambda(lambda x: tf.expand_dims(x, axis=1))(context_vector)
context_vector_repeated = RepeatVector(max_decoder_seq_len)(context_vector)
decoder_combined_context = Concatenate(axis=-1)([context_vector_repeated, decoder_outputs])

# Final Dense layer
decoder_dense = Dense(hin_vocab_size, activation='softmax')
output = decoder_dense(decoder_combined_context)

# Full Model
model = Model([encoder_inputs, decoder_inputs], output)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
model.summary()

In [39]:
# ===============================
# 5. Training
# ===============================
model.fit(
    [eng_padded, decoder_input_data],
    decoder_target_data,
    batch_size=16,
    epochs=300,
    verbose=1
)

Epoch 1/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - loss: 1.9459
Epoch 2/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - loss: 1.9194
Epoch 3/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - loss: 1.8924
Epoch 4/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - loss: 1.8637
Epoch 5/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step - loss: 1.8325
Epoch 6/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - loss: 1.7974
Epoch 7/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - loss: 1.7576
Epoch 8/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 1.7117
Epoch 9/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - loss: 1.6587
Epoch 10/300
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - loss: 1.5974
Epoch 11/30

<keras.src.callbacks.history.History at 0x1f4afbfe890>

In [46]:
# ===============================
# 6. Inference Setup
# ===============================

# Encoder model
encoder_model = Model(encoder_inputs, [encoder_outputs, state_h, state_c])

# Decoder model
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_hidden_state_input = Input(shape=(max_eng_len, latent_dim))

# Embedding
dec_emb2 = dec_emb_layer(decoder_inputs)
# Decoder LSTM
decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=[decoder_state_input_h, decoder_state_input_c])
# Attention
context_vector2, attention_weights2 = attention(state_h2, decoder_hidden_state_input)
# Expand and concatenate (wrapped in Lambda / Concatenate)
context_vector2 = Lambda(lambda x: tf.expand_dims(x, 1))(context_vector2)
decoder_combined_context2 = Concatenate(axis=-1)([context_vector2, decoder_outputs2])
# Final output
decoder_outputs2 = decoder_dense(decoder_combined_context2)

decoder_model = Model(
    [decoder_inputs, decoder_hidden_state_input, decoder_state_input_h, decoder_state_input_c],
    [decoder_outputs2, state_h2, state_c2]
)

In [42]:
# ===============================
# 7. Translate Function
# ===============================
reverse_hin_index = {i: w for w, i in hin_tokenizer.word_index.items()}

def translate(sentence):
    seq = eng_tokenizer.texts_to_sequences([sentence.lower()])
    seq = pad_sequences(seq, maxlen=max_eng_len, padding='post')
    enc_outs, enc_h, enc_c = encoder_model.predict(seq)

    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = hin_tokenizer.word_index['<start>']

    stop = False
    decoded = []
    while not stop:
        output_tokens, dec_h, dec_c = decoder_model.predict([target_seq, enc_outs, enc_h, enc_c])
        token_index = np.argmax(output_tokens[0, -1, :])
        token = reverse_hin_index.get(token_index, '')
        if token == '<end>' or len(decoded) >= max_hin_len:
            stop = True
        else:
            decoded.append(token)
        target_seq[0, 0] = token_index
        enc_h, enc_c = dec_h, dec_c

    return ' '.join(decoded)

In [47]:
print("English:", "how are you")
print("Hindi:", translate("how are you"))

English: how are you
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
Hindi: आप कैसे हैं
