In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Input, Embedding, Dense, Attention
from tensorflow.keras.models import Model
from tensorflow.keras.utils import pad_sequences
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Concatenate


In [2]:
# Example data
english_sentences = ['hello', 'how are you', 'good morning']
french_sentences = ['bonjour', 'comment ça va', 'bonjour']



In [3]:
def tokenize(sentences):
    tokenizer  = tf.keras.preprocessing.text.Tokenizer()
    tokenizer.fit_on_texts(sentences)
    return tokenizer, tokenizer.texts_to_sequences(sentences)

In [4]:
eng_tokenizer, eng_sequences = tokenize(english_sentences)

In [5]:
eng_tokenizer

<keras.src.legacy.preprocessing.text.Tokenizer at 0x2389279a5c0>

In [6]:
eng_sequences

[[1], [2, 3, 4], [5, 6]]

In [7]:
fr_tokenizer, fr_sequences = tokenize(french_sentences)

In [8]:
fr_sequences

[[1], [2, 3, 4], [1]]

In [9]:
max_eng_len = max(len(seq) for seq in eng_sequences)
max_eng_len

3

In [10]:
max_fr_len = max(len(seq) for seq in fr_sequences)
max_fr_len

3

In [11]:
# Paddding
eng_sequences = pad_sequences(eng_sequences, maxlen=max_eng_len, padding='post')
fr_sequences = pad_sequences(fr_sequences, maxlen=max_fr_len, padding='post')

In [12]:
eng_sequences

array([[1, 0, 0],
       [2, 3, 4],
       [5, 6, 0]])

In [13]:
fr_sequences

array([[1, 0, 0],
       [2, 3, 4],
       [1, 0, 0]])

In [14]:
# Vocabulary sizes
eng_vocab_size = len(eng_tokenizer.word_index)+1
fr_vocab_size = len(fr_tokenizer.word_index)+1

In [15]:
eng_tokenizer.word_index

{'hello': 1, 'how': 2, 'are': 3, 'you': 4, 'good': 5, 'morning': 6}

### Build Encoder-Decoder Model with Attention

In [16]:
# Hyperparameters
embedding_dim = 64
hidden_dim = 128

# Encoder
encoder_inputs = Input(shape=(max_eng_len,))
encoder_embedding = Embedding(input_dim=eng_vocab_size, output_dim=embedding_dim)(encoder_inputs)
encoder_lstm = LSTM(hidden_dim, return_sequences=True, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]


# Decoder
decoder_inputs = Input(shape=(max_fr_len,))
decoder_embedding = Embedding(input_dim=fr_vocab_size, output_dim=embedding_dim)(decoder_inputs)
decoder_lstm = LSTM(units=hidden_dim, return_sequences=True, return_state=True )
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)

# Attention layer
attention=Attention()
context_vector = attention([decoder_outputs, encoder_outputs])

# Concatenate attention output with decoder outputs
concat_outputs = Concatenate(axis=-1)([decoder_outputs, context_vector])

# Dense layer
decoder_dense = Dense(units=fr_vocab_size, activation='softmax')
decoder_outputs = decoder_dense(concat_outputs)

# Model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [75]:
fr_vocab_size

5

### Prepare Data for Training

In [17]:
fr_sequences

array([[1, 0, 0],
       [2, 3, 4],
       [1, 0, 0]])

In [18]:
eng_sequences

array([[1, 0, 0],
       [2, 3, 4],
       [5, 6, 0]])

In [19]:
# Target data for decoder (shifts outputs for teacher forcing)
decoder_input_data = fr_sequences[:,:-1]  # Remove last word
decoder_input_data = pad_sequences(decoder_input_data, maxlen=max_fr_len, padding='post')
decoder_input_data

array([[1, 0, 0],
       [2, 3, 0],
       [1, 0, 0]])

In [20]:
decoder_target_data = fr_sequences[:,1:] #Remove first word
decoder_target_data = pad_sequences(decoder_target_data, maxlen=max_fr_len, padding='post')
decoder_input_data

array([[1, 0, 0],
       [2, 3, 0],
       [1, 0, 0]])

In [21]:
# # Reshape target data to match sparse categorical cross-entropy
# decoder_target_data = np.expand_dims(decoder_target_data, -1)
# decoder_target_data

In [22]:
print("Eng sequences shape:", eng_sequences.shape)
print("Decoder input shape:", decoder_input_data.shape)
print("Decoder target shape:", decoder_target_data.shape)

Eng sequences shape: (3, 3)
Decoder input shape: (3, 3)
Decoder target shape: (3, 3)


In [23]:
X_train, X_test = train_test_split(eng_sequences, test_size=0.2)
decoder_input_train, decoder_input_test = train_test_split(decoder_input_data, test_size=0.2)
decoder_target_train, decoder_target_test = train_test_split(decoder_target_data, test_size=0.2)


In [24]:
print(X_train.shape, decoder_input_train.shape, decoder_target_train.shape)


(2, 3) (2, 3) (2, 3)


In [25]:
model.fit(
    [X_train, decoder_input_train],
    decoder_target_train,
    batch_size=32,
    epochs=50,
    validation_split=0.2
)

Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10s/step - accuracy: 0.3333 - loss: 1.6073 - val_accuracy: 0.6667 - val_loss: 1.5917
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step - accuracy: 0.6667 - loss: 1.5942 - val_accuracy: 0.6667 - val_loss: 1.5810
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step - accuracy: 0.6667 - loss: 1.5809 - val_accuracy: 0.6667 - val_loss: 1.5699
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step - accuracy: 0.6667 - loss: 1.5673 - val_accuracy: 0.6667 - val_loss: 1.5581
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 150ms/step - accuracy: 0.6667 - loss: 1.5531 - val_accuracy: 0.6667 - val_loss: 1.5454
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step - accuracy: 0.6667 - loss: 1.5379 - val_accuracy: 0.6667 - val_loss: 1.5316
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x23893a94e50>

In [None]:
model.save_weights('model_weights.weights.h5')  # Save weights after training


In [None]:
from tensorflow.keras.layers import Input, Embedding, LSTM, Attention, Concatenate, Dense
from tensorflow.keras.models import Model

# Encoder Model
encoder_input = Input(shape=(max_eng_len,), name="encoder_input")
encoder_embedding = Embedding(input_dim=eng_vocab_size, output_dim=64, name="embedding_encoder")(encoder_input)
encoder_lstm = LSTM(128, return_state=True, return_sequences=True, name="encoder_lstm")
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)

# Decoder Model
decoder_input = Input(shape=(max_fr_len,), name="decoder_input")
decoder_embedding = Embedding(input_dim=fr_vocab_size, output_dim=64, name="embedding_decoder")(decoder_input)
decoder_lstm = LSTM(128, return_sequences=True, return_state=True, name="decoder_lstm")
decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(
    decoder_embedding, initial_state=[state_h, state_c]
)

# Attention Layer
attention = Attention(name="attention_layer")
context_vector = attention([decoder_outputs, encoder_outputs])

# Concatenate context vector and decoder outputs
concat = Concatenate(axis=-1, name="concat_layer")([decoder_outputs, context_vector])

# Dense layer for output
dense = Dense(5, activation='softmax', name="output_layer")(concat)

# Define the inference model
inference_model = Model(
    inputs=[encoder_input, decoder_input],
    outputs=[dense, decoder_state_h, decoder_state_c]
)

inference_model.summary()


In [78]:
inference_model.load_weights(r'model_weights.weights.h5')


In [32]:
def preprocess_sentence(sentence):
    sentence_seq = [eng_tokenizer.texts_to_sequences([sentence])[0]]
    sentence_seq = pad_sequences(sentence_seq, maxlen=max_eng_len, padding='post')
    return sentence_seq

# Example input sentence
input_sentence = "Hello, how are you?"
input_seq = preprocess_sentence(input_sentence)
input_seq

array([[2, 3, 4]])

In [53]:
encoder_status_value = encoder_model.predict(input_seq)
encoder_status_value

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step


[array([[ 0.01818199,  0.06864583, -0.09847339, -0.04110701,  0.0719709 ,
         -0.07051085,  0.07920869,  0.07060644, -0.04492887,  0.0645945 ,
         -0.05822396,  0.08581714,  0.05485027, -0.07997734, -0.07035441,
          0.07867626,  0.09376277,  0.05891528,  0.05867816,  0.03451716,
          0.05362396, -0.029333  , -0.08713233,  0.06484129,  0.03081331,
          0.08502707,  0.09150185,  0.09163052,  0.07750283, -0.00342383,
          0.00996422, -0.08636239,  0.08020256, -0.07914649,  0.08136676,
         -0.05865603,  0.07157349,  0.08025861, -0.06090285, -0.05491629,
          0.08774655,  0.07433639,  0.05563157, -0.08966966,  0.08677361,
          0.05858992, -0.07849213, -0.08618442,  0.03185561, -0.08210927,
          0.02681711, -0.07851245,  0.04491309, -0.08157173,  0.06552698,
          0.06219072, -0.04296044, -0.01064965,  0.07485264, -0.07121295,
         -0.00065565,  0.04698627,  0.02403218,  0.08630774, -0.07575972,
         -0.02263746,  0.08299419,  0.

In [40]:
start_token = fr_tokenizer.word_index['bonjour']
target_seq = np.array([[start_token]])
target_seq.shape

(1, 1)

In [60]:
decoder_model

<Functional name=functional_5, built=True>

In [59]:
decoder_inputs = [target_seq] + encoder_status_value

In [62]:
decoded_sentence = ""
for _ in range(max_fr_len):
    output_token, h, c = decoder_model.predict(decoder_inputs)
    # Get the word with highest probability
    sampled_token_index = np.argmax(output_tokens[0,-1,:])
    sampled_word = fr_tokenizer.index_word.get(sampled_token_index, '')
    
    if sample_word == '<end>':
        break
    decoded_sentence += " " + sample_word
    target_seq = np.array([[sampled_token_index]])
    encoder_state_values = [h, c]

print("Decoded sentence :", decoded_sentence)


TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType'