In [2]:
import unicodedata

import re


# Convert the unicode sequence to ascii
def unicode_to_ascii(s):

  # Normalize the unicode string and remove the non-spacking mark
  return ''.join(c for c in unicodedata.normalize('NFD', s)
      if unicodedata.category(c) != 'Mn')

# Preprocess the sequence
def preprocess_sentence(w):

  # Clean the sequence
  w = unicode_to_ascii(w.lower().strip())
  
  # Create a space between a word and the punctuation following it also place a space between the punctuation and the following word. Note that punctuation also includes | 

  w = re.sub(r"([?.!।])", r" \1 ", w)
  
  # Add a start and stop token to detect the start and end of the sequence
  w = '<start> ' + w + ' <end>'
  return w

In [18]:
import io

# Create the Dataset
def create_dataset(path):
  lines = io.open(path, encoding='UTF-8').read().strip().split('\n')

  # Loop through lines (sequences) and extract the English and French sequences. Store them as a word-pair
  word_pairs = [[preprocess_sentence(w) for w in l.split('\t', 2)[:-1]]  for l in lines]
  return zip(*word_pairs)

In [19]:
path_to_file='ben.txt'

In [20]:
lines = io.open(path_to_file, encoding='UTF-8').read().strip().split('\n')
print(lines[0])
print(preprocess_sentence(lines[0].split('\t', 2)[0]))
print(preprocess_sentence(lines[0].split('\t', 2)[1]))

Go.	যাও।	CC-BY 2.0 (France) Attribution: tatoeba.org #2877272 (CM) & #5545004 (tanay)
<start> go .  <end>
<start> যাও ।  <end>


In [21]:
en, fra = create_dataset(path_to_file)
print(en[0])
print(fra[0])

<start> go .  <end>
<start> যাও ।  <end>


In [22]:
import tensorflow as tf

# Convert sequences to tokenizers
def tokenize(lang):
  lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(
      filters='')
  
  # Convert sequences into internal vocab
  lang_tokenizer.fit_on_texts(lang)

  # Convert internal vocab to numbers
  tensor = lang_tokenizer.texts_to_sequences(lang)

  # Pad the tensors to assign equal length to all the sequences
  tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor,
                                                         padding='post')

  return tensor, lang_tokenizer

2024-05-16 12:38:23.039117: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [26]:
# Load the dataset
def load_dataset(path, num_examples=None):
 
  # Create dataset (targ_lan = English, inp_lang = French)
  inp_lang,targ_lang = create_dataset(path)

  # Tokenize the sequences
  input_tensor, inp_lang_tokenizer = tokenize(inp_lang)
  target_tensor, targ_lang_tokenizer = tokenize(targ_lang)

  return input_tensor, target_tensor, inp_lang_tokenizer, targ_lang_tokenizer

In [27]:
# Consider 50k examples
num_examples = 50000
input_tensor, target_tensor, inp_lang, targ_lang = load_dataset(path_to_file)

# Calculate max_length of the target tensors
max_length_targ, max_length_inp = target_tensor.shape[1], input_tensor.shape[1]

In [51]:
print(max_length_targ, max_length_inp)

21 23


In [28]:
print(input_tensor[0], target_tensor.shape)

[ 1 33  3  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0] (6509, 21)


In [29]:
from sklearn.model_selection import train_test_split

# Create training and validation sets using an 80/20 split
input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.2)

print(len(input_tensor_train), len(target_tensor_train), len(input_tensor_val), len(target_tensor_val))

5207 5207 1302 1302


In [30]:
print(input_tensor_train)
print(inp_lang)

[[   1   22   49 ...    0    0    0]
 [   1   84   10 ...    0    0    0]
 [   1    4  190 ...    0    0    0]
 ...
 [   1 1450 2625 ...    0    0    0]
 [   1    7   10 ...    0    0    0]
 [   1  165 1310 ...    0    0    0]]
<keras.src.legacy.preprocessing.text.Tokenizer object at 0x165824890>


In [31]:
# Show the mapping b/w word index and language tokenizer
def convert(lang, tensor):
  for t in tensor:
    if t != 0:
      print ("%d ----> %s" % (t, lang.index_word[t]))
      
print ("Input Language; index to word mapping")
convert(inp_lang, input_tensor_train[0])
print ()
print ("Target Language; index to word mapping")
convert(targ_lang, target_tensor_train[0])

Input Language; index to word mapping
1 ----> <start>
22 ----> what
49 ----> time
63 ----> does
9 ----> the
167 ----> next
218 ----> train
99 ----> leave
38 ----> for
894 ----> tokyo
5 ----> ?
2 ----> <end>

Target Language; index to word mapping
1 ----> <start>
1984 ----> টোকিওর
34 ----> জনয
258 ----> পরের
391 ----> টরেনটা
153 ----> কটার
43 ----> সময
450 ----> ছাডে
5 ----> ?
2 ----> <end>


In [32]:
BUFFER_SIZE = len(input_tensor_train)
BATCH_SIZE = 64
steps_per_epoch = len(input_tensor_train)//BATCH_SIZE
embedding_dim = 256
units = 1024
vocab_inp_size = len(inp_lang.word_index) + 1
vocab_tar_size = len(targ_lang.word_index) + 1

In [33]:
dataset = tf.data.Dataset.from_tensor_slices((input_tensor_train, target_tensor_train)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

In [34]:
# Size of input and target batches
example_input_batch, example_target_batch = next(iter(dataset))
example_input_batch.shape, example_target_batch.shape

(TensorShape([64, 23]), TensorShape([64, 21]))

In [35]:
import tensorflow as tf

class Encoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
        super(Encoder, self).__init__()
        self.batch_sz = batch_sz
        self.enc_units = enc_units

        # Embedding layer
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)

        # LSTM Layer
        self.lstm = tf.keras.layers.LSTM(self.enc_units,
                                         return_sequences=True,
                                         return_state=True,
                                         recurrent_initializer='glorot_uniform')

    # Encoder network comprises an Embedding layer followed by an LSTM layer
    def call(self, x, hidden):
        x = self.embedding(x)
        output, state_h, state_c = self.lstm(x, initial_state=hidden)
        state = [state_h, state_c]
        return output, state

    # To initialize the hidden state
    def initialize_hidden_state(self):
        return [tf.zeros((self.batch_sz, self.enc_units)), tf.zeros((self.batch_sz, self.enc_units))]


In [36]:
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)

sample_hidden = encoder.initialize_hidden_state()
sample_output, sample_hidden = encoder(example_input_batch, sample_hidden)

print ('Encoder output shape: (batch size, sequence length, units) {}'.format(sample_output.shape))
print ('Encoder Hidden state shape: (batch size, units) {}'.format(sample_hidden[0].shape))
print ('Encoder Cell state shape: (batch size, units) {}'.format(sample_hidden[1].shape))


Encoder output shape: (batch size, sequence length, units) (64, 23, 1024)
Encoder Hidden state shape: (batch size, units) (64, 1024)
Encoder Cell state shape: (batch size, units) (64, 1024)


In [37]:
import tensorflow as tf

class PayAttention(tf.keras.layers.Layer):
    def __init__(self, units, length):
        self.units=units
        self.length=length
        super(PayAttention, self).__init__() #Call initializer of the superclass

    def build(self, input_shape):
        self.w = self.add_weight(shape=(self.units,1), initializer='normal')
        self.b = self.add_weight(shape=(self.length,1), initializer='zeros')
        super(PayAttention, self).build(input_shape)

    def call(self, sentences):
        E = tf.nn.tanh(tf.keras.backend.dot(sentences,self.w)+self.b)
        A = tf.nn.softmax(E, axis=1)
        out= A*sentences
        return tf.keras.backend.sum(out, axis=1), A
 
    


In [38]:
class DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz,inp_units, enc_length):
        super().__init__()
        self.batch_sz=batch_sz
        self.dec_units=dec_units
        self.embedding_dim=embedding_dim
        self.vocab_size=vocab_size 

        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim) 

        self.gru = tf.keras.layers.GRU(self.dec_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')

        self.fc = tf.keras.layers.Dense(vocab_size)        

    def call(self, context_vector, dec_input):
        # hidden is the hidden states of all the units in the encoder
        # context_vector is the context vector from the attention layer
        # dec_input is the input to the decoder

        # Now, first embed the decoder input
        x=self.embedding(dec_input)
        
        # Now we will concat the encoder input and the context vector
            #first expand the context vector
        context_vector=tf.expand_dims(context_vector,1)
        x = tf.concat([context_vector, x], axis=-1)

        # Pass through a GRU layer
        output,state = self.gru(x)

        # Pass through a dense layer to get the probabilities distribution over the target vocabulary
        output = tf.reshape(output, (-1, output.shape[2]))
        x = self.fc(output)

        return x, state 

In [39]:
#Now we define the loss function. The loss function is the cross entropy loss function. The cross entropy loss function is defined as follows:
#-sum(y_true * log(y_pred), axis=-1)
import numpy as np

optimizer = tf.optimizers.Adam()

def loss_function(real, pred):
    mask = 1 - np.equal(real, 0)
    loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=real, logits=pred) * mask
    return tf.reduce_mean(loss_)

In [40]:

#Now we run the training loop

optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

decoder=DecoderLayer(vocab_tar_size, embedding_dim, units, BATCH_SIZE, units, max_length_inp)
encoder=Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)
attention=PayAttention(units, max_length_inp)

import os
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 decoder=decoder,
                                 attention=attention)

def train_step(inp, targ):
    loss=0

    with tf.GradientTape() as tape:
        
        # Initialize the hidden state of the encoder and pass the input to the encoder
        hidden_initialize = encoder.initialize_hidden_state()
        enc_output, enc_hidden = encoder(inp, hidden_initialize)

        #Now run the attention layer
        context_vector, attention_weights = attention(enc_output)
        dec_input = tf.expand_dims([targ_lang.word_index['<start>']] * BATCH_SIZE, 1)   

        # Teacher forcing - feeding the target as the next input
        for t in range(1, targ.shape[1]):
            # passing enc_output to the decoder
            predictions, dec_hidden = decoder(context_vector, dec_input)
            loss += loss_function(targ[:, t], predictions)
            # using teacher forcing
            dec_input = tf.expand_dims(targ[:, t], 1)
        

    batch_loss = (loss / int(targ.shape[1]))
    variables = encoder.trainable_variables + decoder.trainable_variables
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))
    return batch_loss


    

In [42]:
print(steps_per_epoch)

81


In [78]:
EPOCHS=10
import time

for epoch in range(EPOCHS):
    start = time.time()
    total_loss = 0

    for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
        batch_loss = train_step(inp, targ)
        total_loss += batch_loss
        print(targ.shape)
        print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                         batch,
                                                         batch_loss.numpy()))
    # saving (checkpoint) the model every 2 epochs
    if (epoch + 1) % 2 == 0:
        checkpoint.save(file_prefix = checkpoint_prefix+str(epoch))

    print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                      total_loss / steps_per_epoch))
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))



(64, 21)
Epoch 1 Batch 0 Loss 0.3545
(64, 21)
Epoch 1 Batch 1 Loss 0.3561
(64, 21)
Epoch 1 Batch 2 Loss 0.3595
(64, 21)
Epoch 1 Batch 3 Loss 0.3273
(64, 21)
Epoch 1 Batch 4 Loss 0.3805
(64, 21)
Epoch 1 Batch 5 Loss 0.3417
(64, 21)
Epoch 1 Batch 6 Loss 0.3837
(64, 21)
Epoch 1 Batch 7 Loss 0.3203
(64, 21)
Epoch 1 Batch 8 Loss 0.3395
(64, 21)
Epoch 1 Batch 9 Loss 0.4266
(64, 21)
Epoch 1 Batch 10 Loss 0.3914
(64, 21)
Epoch 1 Batch 11 Loss 0.3714
(64, 21)
Epoch 1 Batch 12 Loss 0.4053
(64, 21)
Epoch 1 Batch 13 Loss 0.4689
(64, 21)
Epoch 1 Batch 14 Loss 0.3660
(64, 21)
Epoch 1 Batch 15 Loss 0.3301
(64, 21)
Epoch 1 Batch 16 Loss 0.3784
(64, 21)
Epoch 1 Batch 17 Loss 0.4065
(64, 21)
Epoch 1 Batch 18 Loss 0.4588
(64, 21)
Epoch 1 Batch 19 Loss 0.3710
(64, 21)
Epoch 1 Batch 20 Loss 0.3208
(64, 21)
Epoch 1 Batch 21 Loss 0.3602
(64, 21)
Epoch 1 Batch 22 Loss 0.3885
(64, 21)
Epoch 1 Batch 23 Loss 0.4281
(64, 21)
Epoch 1 Batch 24 Loss 0.5082
(64, 21)
Epoch 1 Batch 25 Loss 0.3652
(64, 21)
Epoch 1 Batch

2024-05-16 13:51:14.726170: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


(64, 21)
Epoch 2 Batch 0 Loss 0.2604
(64, 21)
Epoch 2 Batch 1 Loss 0.2697
(64, 21)
Epoch 2 Batch 2 Loss 0.3053
(64, 21)
Epoch 2 Batch 3 Loss 0.2343
(64, 21)
Epoch 2 Batch 4 Loss 0.2526
(64, 21)
Epoch 2 Batch 5 Loss 0.3107
(64, 21)
Epoch 2 Batch 6 Loss 0.3914
(64, 21)
Epoch 2 Batch 7 Loss 0.3136
(64, 21)
Epoch 2 Batch 8 Loss 0.3126
(64, 21)
Epoch 2 Batch 9 Loss 0.3154
(64, 21)
Epoch 2 Batch 10 Loss 0.2479
(64, 21)
Epoch 2 Batch 11 Loss 0.3021
(64, 21)
Epoch 2 Batch 12 Loss 0.3467
(64, 21)
Epoch 2 Batch 13 Loss 0.2448
(64, 21)
Epoch 2 Batch 14 Loss 0.2674
(64, 21)
Epoch 2 Batch 15 Loss 0.2741
(64, 21)
Epoch 2 Batch 16 Loss 0.2777
(64, 21)
Epoch 2 Batch 17 Loss 0.2748
(64, 21)
Epoch 2 Batch 18 Loss 0.2902
(64, 21)
Epoch 2 Batch 19 Loss 0.3032
(64, 21)
Epoch 2 Batch 20 Loss 0.3355
(64, 21)
Epoch 2 Batch 21 Loss 0.3702
(64, 21)
Epoch 2 Batch 22 Loss 0.2954
(64, 21)
Epoch 2 Batch 23 Loss 0.2933
(64, 21)
Epoch 2 Batch 24 Loss 0.3080
(64, 21)
Epoch 2 Batch 25 Loss 0.3197
(64, 21)
Epoch 2 Batch

2024-05-16 13:54:37.368806: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 2 Loss 0.3138
Time taken for 1 epoch 203.19978213310242 sec

(64, 21)
Epoch 3 Batch 0 Loss 0.2391
(64, 21)
Epoch 3 Batch 1 Loss 0.1855
(64, 21)
Epoch 3 Batch 2 Loss 0.1952
(64, 21)
Epoch 3 Batch 3 Loss 0.2495
(64, 21)
Epoch 3 Batch 4 Loss 0.2102
(64, 21)
Epoch 3 Batch 5 Loss 0.2573
(64, 21)
Epoch 3 Batch 6 Loss 0.2098
(64, 21)
Epoch 3 Batch 7 Loss 0.1900
(64, 21)
Epoch 3 Batch 8 Loss 0.1946
(64, 21)
Epoch 3 Batch 9 Loss 0.2215
(64, 21)
Epoch 3 Batch 10 Loss 0.2541
(64, 21)
Epoch 3 Batch 11 Loss 0.2101
(64, 21)
Epoch 3 Batch 12 Loss 0.2926
(64, 21)
Epoch 3 Batch 13 Loss 0.2614
(64, 21)
Epoch 3 Batch 14 Loss 0.2603
(64, 21)
Epoch 3 Batch 15 Loss 0.2173
(64, 21)
Epoch 3 Batch 16 Loss 0.2570
(64, 21)
Epoch 3 Batch 17 Loss 0.2495
(64, 21)
Epoch 3 Batch 18 Loss 0.2070
(64, 21)
Epoch 3 Batch 19 Loss 0.2830
(64, 21)
Epoch 3 Batch 20 Loss 0.2480
(64, 21)
Epoch 3 Batch 21 Loss 0.2313
(64, 21)
Epoch 3 Batch 22 Loss 0.2517
(64, 21)
Epoch 3 Batch 23 Loss 0.2632
(64, 21)
Epoch 3 Batch 24 Loss 

2024-05-16 13:57:59.072532: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


(64, 21)
Epoch 4 Batch 0 Loss 0.1658
(64, 21)
Epoch 4 Batch 1 Loss 0.1740
(64, 21)
Epoch 4 Batch 2 Loss 0.2049
(64, 21)
Epoch 4 Batch 3 Loss 0.1425
(64, 21)
Epoch 4 Batch 4 Loss 0.1619
(64, 21)
Epoch 4 Batch 5 Loss 0.2137
(64, 21)
Epoch 4 Batch 6 Loss 0.1842
(64, 21)
Epoch 4 Batch 7 Loss 0.1942
(64, 21)
Epoch 4 Batch 8 Loss 0.2065
(64, 21)
Epoch 4 Batch 9 Loss 0.2384
(64, 21)
Epoch 4 Batch 10 Loss 0.1683
(64, 21)
Epoch 4 Batch 11 Loss 0.1924
(64, 21)
Epoch 4 Batch 12 Loss 0.1948
(64, 21)
Epoch 4 Batch 13 Loss 0.2152
(64, 21)
Epoch 4 Batch 14 Loss 0.1941
(64, 21)
Epoch 4 Batch 15 Loss 0.1654
(64, 21)
Epoch 4 Batch 16 Loss 0.2231
(64, 21)
Epoch 4 Batch 17 Loss 0.1933
(64, 21)
Epoch 4 Batch 18 Loss 0.1815
(64, 21)
Epoch 4 Batch 19 Loss 0.2096
(64, 21)
Epoch 4 Batch 20 Loss 0.1994
(64, 21)
Epoch 4 Batch 21 Loss 0.1693
(64, 21)
Epoch 4 Batch 22 Loss 0.1785
(64, 21)
Epoch 4 Batch 23 Loss 0.1797
(64, 21)
Epoch 4 Batch 24 Loss 0.2202
(64, 21)
Epoch 4 Batch 25 Loss 0.1770
(64, 21)
Epoch 4 Batch

2024-05-16 14:01:11.200386: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 4 Loss 0.2042
Time taken for 1 epoch 192.72276401519775 sec

(64, 21)
Epoch 5 Batch 0 Loss 0.1468
(64, 21)
Epoch 5 Batch 1 Loss 0.1446
(64, 21)
Epoch 5 Batch 2 Loss 0.1378
(64, 21)
Epoch 5 Batch 3 Loss 0.1198
(64, 21)
Epoch 5 Batch 4 Loss 0.1464
(64, 21)
Epoch 5 Batch 5 Loss 0.1521
(64, 21)
Epoch 5 Batch 6 Loss 0.1272
(64, 21)
Epoch 5 Batch 7 Loss 0.1381
(64, 21)
Epoch 5 Batch 8 Loss 0.1620
(64, 21)
Epoch 5 Batch 9 Loss 0.1264
(64, 21)
Epoch 5 Batch 10 Loss 0.1352
(64, 21)
Epoch 5 Batch 11 Loss 0.1310
(64, 21)
Epoch 5 Batch 12 Loss 0.1751
(64, 21)
Epoch 5 Batch 13 Loss 0.1602
(64, 21)
Epoch 5 Batch 14 Loss 0.1212
(64, 21)
Epoch 5 Batch 15 Loss 0.1463
(64, 21)
Epoch 5 Batch 16 Loss 0.1413
(64, 21)
Epoch 5 Batch 17 Loss 0.1827
(64, 21)
Epoch 5 Batch 18 Loss 0.2075
(64, 21)
Epoch 5 Batch 19 Loss 0.1204
(64, 21)
Epoch 5 Batch 20 Loss 0.1317
(64, 21)
Epoch 5 Batch 21 Loss 0.1302
(64, 21)
Epoch 5 Batch 22 Loss 0.1428
(64, 21)
Epoch 5 Batch 23 Loss 0.1545
(64, 21)
Epoch 5 Batch 24 Loss 

2024-05-16 14:04:21.220694: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


(64, 21)
Epoch 6 Batch 0 Loss 0.1090
(64, 21)
Epoch 6 Batch 1 Loss 0.0923
(64, 21)
Epoch 6 Batch 2 Loss 0.1112
(64, 21)
Epoch 6 Batch 3 Loss 0.1216
(64, 21)
Epoch 6 Batch 4 Loss 0.1268
(64, 21)
Epoch 6 Batch 5 Loss 0.1268
(64, 21)
Epoch 6 Batch 6 Loss 0.1183
(64, 21)
Epoch 6 Batch 7 Loss 0.1249
(64, 21)
Epoch 6 Batch 8 Loss 0.1244
(64, 21)
Epoch 6 Batch 9 Loss 0.0962
(64, 21)
Epoch 6 Batch 10 Loss 0.1034
(64, 21)
Epoch 6 Batch 11 Loss 0.1055
(64, 21)
Epoch 6 Batch 12 Loss 0.1124
(64, 21)
Epoch 6 Batch 13 Loss 0.1545
(64, 21)
Epoch 6 Batch 14 Loss 0.1282
(64, 21)
Epoch 6 Batch 15 Loss 0.1166
(64, 21)
Epoch 6 Batch 16 Loss 0.1242
(64, 21)
Epoch 6 Batch 17 Loss 0.1103
(64, 21)
Epoch 6 Batch 18 Loss 0.0986
(64, 21)
Epoch 6 Batch 19 Loss 0.1459
(64, 21)
Epoch 6 Batch 20 Loss 0.1147
(64, 21)
Epoch 6 Batch 21 Loss 0.1169
(64, 21)
Epoch 6 Batch 22 Loss 0.1513
(64, 21)
Epoch 6 Batch 23 Loss 0.1237
(64, 21)
Epoch 6 Batch 24 Loss 0.0847
(64, 21)
Epoch 6 Batch 25 Loss 0.1109
(64, 21)
Epoch 6 Batch

2024-05-16 14:07:30.688292: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 6 Loss 0.1351
Time taken for 1 epoch 190.0282850265503 sec

(64, 21)
Epoch 7 Batch 0 Loss 0.1012
(64, 21)
Epoch 7 Batch 1 Loss 0.0961
(64, 21)
Epoch 7 Batch 2 Loss 0.1118
(64, 21)
Epoch 7 Batch 3 Loss 0.1145
(64, 21)
Epoch 7 Batch 4 Loss 0.1123
(64, 21)
Epoch 7 Batch 5 Loss 0.1033
(64, 21)
Epoch 7 Batch 6 Loss 0.1264
(64, 21)
Epoch 7 Batch 7 Loss 0.1079
(64, 21)
Epoch 7 Batch 8 Loss 0.0930
(64, 21)
Epoch 7 Batch 9 Loss 0.1118
(64, 21)
Epoch 7 Batch 10 Loss 0.0960
(64, 21)
Epoch 7 Batch 11 Loss 0.1146
(64, 21)
Epoch 7 Batch 12 Loss 0.0974
(64, 21)
Epoch 7 Batch 13 Loss 0.0890
(64, 21)
Epoch 7 Batch 14 Loss 0.0931
(64, 21)
Epoch 7 Batch 15 Loss 0.0998
(64, 21)
Epoch 7 Batch 16 Loss 0.0926
(64, 21)
Epoch 7 Batch 17 Loss 0.1162
(64, 21)
Epoch 7 Batch 18 Loss 0.1061
(64, 21)
Epoch 7 Batch 19 Loss 0.0956
(64, 21)
Epoch 7 Batch 20 Loss 0.1080
(64, 21)
Epoch 7 Batch 21 Loss 0.1144
(64, 21)
Epoch 7 Batch 22 Loss 0.1150
(64, 21)
Epoch 7 Batch 23 Loss 0.1135
(64, 21)
Epoch 7 Batch 24 Loss 0

2024-05-16 14:10:42.983198: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


(64, 21)
Epoch 8 Batch 0 Loss 0.0837
(64, 21)
Epoch 8 Batch 1 Loss 0.0858
(64, 21)
Epoch 8 Batch 2 Loss 0.0922
(64, 21)
Epoch 8 Batch 3 Loss 0.0715
(64, 21)
Epoch 8 Batch 4 Loss 0.0800
(64, 21)
Epoch 8 Batch 5 Loss 0.0706
(64, 21)
Epoch 8 Batch 6 Loss 0.0914
(64, 21)
Epoch 8 Batch 7 Loss 0.0805
(64, 21)
Epoch 8 Batch 8 Loss 0.0647
(64, 21)
Epoch 8 Batch 9 Loss 0.0939
(64, 21)
Epoch 8 Batch 10 Loss 0.1142
(64, 21)
Epoch 8 Batch 11 Loss 0.1093
(64, 21)
Epoch 8 Batch 12 Loss 0.0917
(64, 21)
Epoch 8 Batch 13 Loss 0.0984
(64, 21)
Epoch 8 Batch 14 Loss 0.0749
(64, 21)
Epoch 8 Batch 15 Loss 0.0746
(64, 21)
Epoch 8 Batch 16 Loss 0.0871
(64, 21)
Epoch 8 Batch 17 Loss 0.0889
(64, 21)
Epoch 8 Batch 18 Loss 0.0999
(64, 21)
Epoch 8 Batch 19 Loss 0.1094
(64, 21)
Epoch 8 Batch 20 Loss 0.0912
(64, 21)
Epoch 8 Batch 21 Loss 0.0828
(64, 21)
Epoch 8 Batch 22 Loss 0.0844
(64, 21)
Epoch 8 Batch 23 Loss 0.0828
(64, 21)
Epoch 8 Batch 24 Loss 0.1141
(64, 21)
Epoch 8 Batch 25 Loss 0.0726
(64, 21)
Epoch 8 Batch

2024-05-16 14:13:54.931194: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 8 Loss 0.0979
Time taken for 1 epoch 192.54406118392944 sec

(64, 21)
Epoch 9 Batch 0 Loss 0.0722
(64, 21)
Epoch 9 Batch 1 Loss 0.0600
(64, 21)
Epoch 9 Batch 2 Loss 0.0578
(64, 21)
Epoch 9 Batch 3 Loss 0.0900
(64, 21)
Epoch 9 Batch 4 Loss 0.0737
(64, 21)
Epoch 9 Batch 5 Loss 0.0656
(64, 21)
Epoch 9 Batch 6 Loss 0.0988
(64, 21)
Epoch 9 Batch 7 Loss 0.0675
(64, 21)
Epoch 9 Batch 8 Loss 0.0688
(64, 21)
Epoch 9 Batch 9 Loss 0.0756
(64, 21)
Epoch 9 Batch 10 Loss 0.0810
(64, 21)
Epoch 9 Batch 11 Loss 0.0837
(64, 21)
Epoch 9 Batch 12 Loss 0.0761
(64, 21)
Epoch 9 Batch 13 Loss 0.0756
(64, 21)
Epoch 9 Batch 14 Loss 0.0619
(64, 21)
Epoch 9 Batch 15 Loss 0.0979
(64, 21)
Epoch 9 Batch 16 Loss 0.0854
(64, 21)
Epoch 9 Batch 17 Loss 0.0760
(64, 21)
Epoch 9 Batch 18 Loss 0.0692
(64, 21)
Epoch 9 Batch 19 Loss 0.0833
(64, 21)
Epoch 9 Batch 20 Loss 0.0779
(64, 21)
Epoch 9 Batch 21 Loss 0.0866
(64, 21)
Epoch 9 Batch 22 Loss 0.0711
(64, 21)
Epoch 9 Batch 23 Loss 0.0897
(64, 21)
Epoch 9 Batch 24 Loss 

2024-05-16 14:17:07.917332: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


(64, 21)
Epoch 10 Batch 0 Loss 0.0730
(64, 21)
Epoch 10 Batch 1 Loss 0.0765
(64, 21)
Epoch 10 Batch 2 Loss 0.0546
(64, 21)
Epoch 10 Batch 3 Loss 0.0623
(64, 21)
Epoch 10 Batch 4 Loss 0.0483
(64, 21)
Epoch 10 Batch 5 Loss 0.0548
(64, 21)
Epoch 10 Batch 6 Loss 0.0641
(64, 21)
Epoch 10 Batch 7 Loss 0.0663
(64, 21)
Epoch 10 Batch 8 Loss 0.0852
(64, 21)
Epoch 10 Batch 9 Loss 0.0675
(64, 21)
Epoch 10 Batch 10 Loss 0.0396
(64, 21)
Epoch 10 Batch 11 Loss 0.0684
(64, 21)
Epoch 10 Batch 12 Loss 0.0569
(64, 21)
Epoch 10 Batch 13 Loss 0.0755
(64, 21)
Epoch 10 Batch 14 Loss 0.0674
(64, 21)
Epoch 10 Batch 15 Loss 0.0695
(64, 21)
Epoch 10 Batch 16 Loss 0.0782
(64, 21)
Epoch 10 Batch 17 Loss 0.0626
(64, 21)
Epoch 10 Batch 18 Loss 0.0674
(64, 21)
Epoch 10 Batch 19 Loss 0.0725
(64, 21)
Epoch 10 Batch 20 Loss 0.0690
(64, 21)
Epoch 10 Batch 21 Loss 0.0741
(64, 21)
Epoch 10 Batch 22 Loss 0.0897
(64, 21)
Epoch 10 Batch 23 Loss 0.0749
(64, 21)
Epoch 10 Batch 24 Loss 0.0732
(64, 21)
Epoch 10 Batch 25 Loss 0.0

2024-05-16 14:20:20.789721: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 10 Loss 0.0779
Time taken for 1 epoch 193.48266696929932 sec



In [79]:
checkpoint.save(file_prefix = checkpoint_prefix)

'./training_checkpoints/ckpt-12'

In [85]:
max_seq_length = 20

#Now we will generate the translation of the input sentence

def evaluate(sentence):
    
    sentence = preprocess_sentence(sentence)
    sentence = inp_lang.texts_to_sequences([sentence])

    # Pad the sentence to the max_length_inp

    sentence = tf.keras.preprocessing.sequence.pad_sequences(sentence,
                                                            maxlen=max_length_inp,
                                                            padding='post')

    hidden_initialize = encoder.initialize_hidden_state()
    enc_output, enc_hidden = encoder(sentence, hidden_initialize)

    context_vector, attention_weights = attention(enc_output)

    context_vector=context_vector[0]
    context_vector=tf.expand_dims(context_vector,0) 
    print(context_vector.shape)


    dec_input = tf.expand_dims([targ_lang.word_index['<start>']], 1) 
    print(dec_input.shape)
    result = []
    #Disable teacher forcing

    for i in range(max_seq_length):
        predictions, dec_hidden = decoder(context_vector, dec_input)
        predicted_id = tf.argmax(predictions[0]).numpy()

        result.append(targ_lang.index_word[predicted_id])

        if targ_lang.index_word[predicted_id] == '<end>':
            return result

        # the predicted ID is fed back into the model
        dec_input = tf.expand_dims([predicted_id], 0)

    return result


evaluate("What's your name?")



(1, 1024)
(1, 1)


['তোর', 'নাম', 'কি', '?', '<end>']