In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import unidecode
import numpy as np
import tensorflow as tf
from tensorflow import keras
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense,GRU,Bidirectional,Embedding

In [None]:
max_word=18+2
samples=50000
vocab=20000
BATCH=64

import csv
engtxt=[]
hintxt=[]
with open ('data.csv','r',encoding='utf8') as f:
    l=csv.reader(f)
    for row in l:
        engtxt.append(row[1])
        hintxt.append(row[2])

In [None]:
eng=engtxt[:50000]
hin=hintxt[:50000]
#hin=[unidecode.unidecode(sent) for sent in hin]

for i in range (4,5):
    print(eng[i]+'--->'+hin[i])

In [None]:
eng=['#','$']+eng
tokenizerE=Tokenizer(num_words=vocab,oov_token='<OOV>',lower=True,filters='!"%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n')
tokenizerE.fit_on_texts(eng)
eng=eng[2:]
eng_seq=tokenizerE.texts_to_sequences(eng)
eng_pad=pad_sequences(eng_seq,maxlen=max_word-2,truncating='post',padding='post')
eng_inp=[list(np.concatenate(([tokenizerE.word_index['#']],l,[tokenizerE.word_index['$']]),axis=0)) for l in eng_pad]
eng_inp=np.reshape(eng_inp,(samples,max_word)).astype('float32')

hin=['#','$']+hin
tokenizerB=Tokenizer(num_words=vocab,oov_token='<OOV>',lower=False,filters='!"%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n')
tokenizerB.fit_on_texts(hin)
hin=hin[2:]
hin_seq=tokenizerB.texts_to_sequences(hin)
hin_pad=pad_sequences(hin_seq,maxlen=max_word-2,truncating='post',padding='post')
hin_inp=[list(np.concatenate(([tokenizerB.word_index['#']],l,[tokenizerB.word_index['$']]),axis=0)) for l in hin_pad]
hin_inp=np.reshape(hin_inp,(samples,max_word)).astype('float32')

rev_hin_dict = dict(map(reversed, tokenizerB.word_index.items()))

In [None]:
class Encoder(tf.keras.Model):    
    def __init__(self,vocab,BATCH):
        super(Encoder,self).__init__()  
        self.vocab=vocab
        self.BATCH=BATCH
        self.embed=Embedding(self.vocab,256)
        self.gru=Bidirectional(GRU(256,return_state=True,return_sequences=True,
                                   recurrent_initializer='glorot_uniform',dropout=0.5))
        self.gru1=Bidirectional(GRU(256,return_state=True,return_sequences=True,
                                   recurrent_initializer='glorot_uniform',dropout=0.5))
    
    def call(self,encoder_inp,hidden):
        encoder_inp=self.embed(encoder_inp)       
        _,state_htmp,state_ctmp=self.gru(encoder_inp,initial_state=hidden)
        encoder_out,state_h,state_c=self.gru1(encoder_inp,initial_state=[state_htmp,state_ctmp])
        return encoder_out,tf.concat([state_h,state_c],axis=1)
    
    def initialise_hidden_unit(self):
        return [tf.zeros((self.BATCH,256)) for i in range(2)]

In [None]:
class Decoder(tf.keras.Model):   
    def __init__(self,vocab):
        super(Decoder,self).__init__()
        self.vocab=vocab
        self.embed=Embedding(self.vocab,256)
        self.dense=Dense(512)
        self.dense1=Dense(512)
        self.dense2=Dense(1)
        self.gru=GRU(512,return_sequences=True,return_state=True,recurrent_initializer='glorot_uniform',
                     dropout=0.5)
        self.dense3=Dense(self.vocab)
        
    def call(self,decoder_inp,encoder_out,carry):       
        decoder_inp=self.embed(decoder_inp)
        carry=tf.expand_dims(carry,1)
#----------------------------------------------------------------
#attention
        score=self.dense2(tf.math.tanh(self.dense1(encoder_out)+self.dense(carry)))
        attention_weights=tf.nn.softmax(score,axis=1)
        context_vector=tf.math.reduce_sum(attention_weights*encoder_out,axis=1,keepdims=True)
        merged_vector=tf.concat([context_vector,decoder_inp],axis=-1)
#-----------------------------------------------------------------        
        decoder_out,decoder_state=self.gru(merged_vector)
        
        decoder_out=tf.reshape(decoder_out,(-1,decoder_out.shape[2]))
        decoder_out=self.dense3(decoder_out)
        return decoder_out,decoder_state,attention_weights

In [None]:
encoder=Encoder(vocab,BATCH)
decoder=Decoder(vocab)

optimizer=tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True,reduction='none')

def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
    return tf.reduce_mean(loss_)

In [None]:
#create checkpoint
import os
checkpoint_dir = 'E:/Jupyter files/ROUGH/training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer, encoder=encoder, decoder=decoder)

In [None]:
#training loop
@tf.function
def train(inp,out,hidden,vocab,max_word):
    loss=0
    with tf.GradientTape() as tape:
        eo,hidden=encoder(inp,hidden)
        c=hidden       
        bi=tf.expand_dims([tokenizerB.word_index['#']] * BATCH, 1)
        bi=tf.cast(bi,'float32')        
        for i in range (1,max_word):           
            do,ds,_=decoder(bi,eo,c) 
            loss+=loss_function(out[:, i], do)
            bi=tf.expand_dims(out[:, i], 1) 
            bi=tf.cast(bi,'float32')    
    variables = encoder.trainable_variables+decoder.trainable_variables
    gradients=tape.gradient(loss, variables) 
    optimizer.apply_gradients(zip(gradients, variables))
    batch_loss = loss / max_word
    return batch_loss

In [None]:
#Change epoch to 15
EPOCH=15
e=eng_inp
b=hin_inp

dataset = tf.data.Dataset.from_tensor_slices((e,b)).shuffle(samples)
dataset = dataset.batch(BATCH,drop_remainder=True)

In [None]:
import time
for epoch in range(EPOCH):   
    print("Starting epoch {}".format(epoch+1))
    time1=time.time()
    
    hidden = encoder.initialise_hidden_unit()
    total_loss = 0    
    for x_batch,y_batch in dataset: 
#---------------------------------------------------- 
#calling the training loop
        batch_loss = train(x_batch, y_batch, hidden,vocab,max_word)
#----------------------------------------------------
        total_loss += batch_loss

    time2=time.time()
    timediff=time2-time1
    if (epoch + 1) % 2 == 0:
        checkpoint.save(file_prefix = checkpoint_prefix)

    print('loss = {:.4f}\ttime taken = {:.2f} secs'.format(total_loss/(samples//BATCH),timediff))
  

In [None]:
#checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

In [None]:
test="you are a happy man"

In [None]:
test_ori=test
#--------------------------------------------------------
test=[test]
test=tokenizerE.texts_to_sequences(test)
test=np.array(test).astype('float32')
test=pad_sequences(test,maxlen=max_word-2,truncating='post',padding='post')
tst=[list(np.concatenate(([tokenizerE.word_index['#']],test[0],[tokenizerE.word_index['$']]),axis=0))]
tst=np.reshape(tst,(max_word)).astype('float32')
#--------------------------------------------------------
test=tst
test=np.reshape(test,(1,max_word))
hidd=[tf.zeros((1,256)) for i in range(2)]
testenou,testenhi=encoder(test,hidd)
testbi=tf.expand_dims([tokenizerB.word_index['#']] * 1, 1)
testbi=tf.cast(testbi,'float32')
pred=''
apn_att_wt=np.array([])
for i in range (1,max_word):           
    testdo,testds,att_wt=decoder(testbi,testenou,testenhi)    
    apn_att_wt=np.append(apn_att_wt,att_wt)
    m=tf.math.argmax(testdo[0])
 
    pred+=rev_hin_dict[m.numpy()]+' '
    testbi=tf.expand_dims([m.numpy()] * 1, 1)
    testbi=tf.cast(testbi,'float32')
    testenhi=testds
#----------------------------------------------------------   
pre=''
for word in pred.split():
    if word=='$':
        break
    else:
        pre+=word+' '
print(f"eng  =  {test_ori}\npred  =  {pre}")

In [None]:
#to evaluate within training data
p=55
for t in range(p,p+1):
    test=e[t]
    test=np.reshape(test,(1,max_word))
    hidd=[tf.zeros((1,256)) for i in range(2)]
    testenou,testenhi=encoder(test,hidd)
    testbi=tf.expand_dims([tokenizerB.word_index['#']] * 1, 1)
    testbi=tf.cast(testbi,'float32')
    pred=''
    apn_att_wt=np.array([])
    for i in range (1,max_word):           
        testdo,testds,att_wt=decoder(testbi,testenou,testenhi)    
        apn_att_wt=np.append(apn_att_wt,att_wt)
        m=tf.math.argmax(testdo[0])
 
        pred+=rev_hin_dict[m.numpy()]+' '
        testbi=tf.expand_dims([m.numpy()] * 1, 1)
        testbi=tf.cast(testbi,'float32')
        testenhi=testds
    print(f"eng  =  {eng[t]}\nhin  =  {hin[t]}\npred  =  {pred}")

    sentence=[word for word in eng[t].split()]
    predicted_sentence=[word for word in pred.split()]
#-------------------------------------------------------
#to create the plot
attention=np.reshape(apn_att_wt,(max_word,max_word-1))
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
ax.matshow(attention, cmap='viridis')
fontdict = {'fontsize': 14}

ax.set_xticklabels([''] + predicted_sentence, fontdict=fontdict, rotation=90)
ax.set_yticklabels([''] + sentence, fontdict=fontdict)

ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

plt.show()
#--------------------------------------------------------