## *Machine Transliteration of Named Entities from Hindi to English*
***(using Conditional Random Fields and Neural networks)***

***Author : Dhawal Darji***

### Libraries & Imports

In [87]:
# uncomment one of these versions (depending on whether you are on a computer with a CPU or not)

# GPU version
# !conda install --yes --prefix {sys.prefix} pytorch torchvision cudatoolkit=10.2 -c pytorch

# Just CPU
#!conda install --yes --prefix {sys.prefix} pytorch torchvision cpuonly -c pytorch

# uncomment the following to install required libraries on first run

# !conda install --yes --prefix {sys.prefix} einops  -c conda-forge
#!conda install --yes --prefix {sys.prefix} numpy
#!conda install --yes --prefix {sys.prefix} scipy
#!conda install --yes --prefix {sys.prefix} pandas

import sys
import numpy as np
import torch
import einops
import random
import torch.nn as nn
import torch.nn.functional as fun
import torch.optim as opt
from torch.autograd import Variable
from torch.utils.data import Dataset
from datetime import datetime

# Custom utils

'''Syllabifier takes input as a hindi-english word pair and returns syllabified hindi-english words'''
import syllabifier as sb

'''dataLoader contains utility functions to load/split/manipulate data'''
import dataLoader as dl

# Assign gpu if one is available
gpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [88]:
# torch test
import torch

print("GPU/CUDA available? ", torch.cuda.is_available())
print("Torch version", torch.__version__)
print(gpu)

GPU/CUDA available?  True
Torch version 1.7.0
cuda:0


### Vocabularies

In [89]:
# Creating a English and Hindi Vocabulary
enVocab = 'a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z'.split(",")
hnVocab = [chr(c) for c in range(2304, 2432)]

# Creating dictionaries for each vocabulary and assigning a index to each letter
# Format = {Letter: Index}
pad = '-p-' # Index 0 will be used as a pad
enVocabDict = {pad: 0}
hnVocabDict = {pad: 0}

# English vocab dictionary
for index, char in enumerate(enVocab):
    enVocabDict[char] = index+1

# Hindi vocab dictionary 
for index, char in enumerate(hnVocab):
    hnVocabDict[char] = index+1
    
print("English Vocab:",enVocabDict)
print("\nHindi Vocab:",hnVocabDict)

English Vocab: {'-p-': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}

Hindi Vocab: {'-p-': 0, 'ऀ': 1, 'ँ': 2, 'ं': 3, 'ः': 4, 'ऄ': 5, 'अ': 6, 'आ': 7, 'इ': 8, 'ई': 9, 'उ': 10, 'ऊ': 11, 'ऋ': 12, 'ऌ': 13, 'ऍ': 14, 'ऎ': 15, 'ए': 16, 'ऐ': 17, 'ऑ': 18, 'ऒ': 19, 'ओ': 20, 'औ': 21, 'क': 22, 'ख': 23, 'ग': 24, 'घ': 25, 'ङ': 26, 'च': 27, 'छ': 28, 'ज': 29, 'झ': 30, 'ञ': 31, 'ट': 32, 'ठ': 33, 'ड': 34, 'ढ': 35, 'ण': 36, 'त': 37, 'थ': 38, 'द': 39, 'ध': 40, 'न': 41, 'ऩ': 42, 'प': 43, 'फ': 44, 'ब': 45, 'भ': 46, 'म': 47, 'य': 48, 'र': 49, 'ऱ': 50, 'ल': 51, 'ळ': 52, 'ऴ': 53, 'व': 54, 'श': 55, 'ष': 56, 'स': 57, 'ह': 58, 'ऺ': 59, 'ऻ': 60, '़': 61, 'ऽ': 62, 'ा': 63, 'ि': 64, 'ी': 65, 'ु': 66, 'ू': 67, 'ृ': 68, 'ॄ': 69, 'ॅ': 70, 'ॆ': 71, 'े': 72, 'ै': 73, 'ॉ': 74, 'ॊ': 75, 'ो': 76, 'ौ': 77, '्': 78, 'ॎ': 79, 'ॏ': 80, 'ॐ': 81, '॑':

### Encoding Data

In [90]:
def wordEncode(word, letterIndex, device = 'cpu'):
    '''wordEncode takes a word and encodes it 
    using the vocabulary dictionary into a encoded sequence'''
    wordenc = torch.zeros(len(word)+1, 1, len(letterIndex)).to(device)
    
    for index, letter in enumerate(word):
        position = letterIndex[letter]
        wordenc[index][0][position] = 1
    padPosition = letterIndex[pad]
    wordenc[index+1][0][padPosition] = 1
    return wordenc
    
def getEnc(word, letterIndex, device='cpu'):
    '''getEnc takes a word and the vocabulary dictionary and 
    returns a list of indexes for each letter in the vocab'''
    getenc = torch.zeros([len(word)+1, 1], dtype=torch.long).to(device)
    
    for index, letter in enumerate(word):
        position = letterIndex[letter]
        getenc[index][0] = position 
    getenc[index+1][0] = letterIndex[pad]
    return getenc  

# Max characters for the output
MaxOutChars = 20

### Data Loading

In [91]:
class DataLoader(Dataset):
    '''Utility class to load the dataset'''
    
    def __init__(self, filename):
        self.enWords, self.hnWords = self.getEH(filename)
        
        # Shuffle indices to get random samples
        self.shuffleIndices = list(range(len(self.enWords)))
        random.shuffle(self.shuffleIndices)
        self.shuffleStart = 0
        
    def __getitem__(self, index):
        '''Returns a sample from the data with given index'''
        return self.enWords[index], self.hnWords[index]
    
    def __len__(self):
        '''Returns the length of dataset'''
        return len(self.hnWords)
    
    def getEH(self,filename):
        '''Returns the source(X) and target(y) arrays for the given dataset '''
        enWords = []
        hnWords = []
        with open(filename, 'r', encoding='utf-8') as f:
            for d in f:
                enWords.append(d.split(" ")[1].strip("\n"))
                hnWords.append(d.split(" ")[0])

        return enWords, hnWords

    def getRandomSample(self):
        '''Returns a Random Sample from the dataset'''
        index = np.random.randint(len(self.enWords))
        return self.__getitem__(index)
    
    def getBatchSolo(self, batchSize, fromSet):
        '''Returns a batch of each(source, target) from data for training'''
        shuffleEnd = self.shuffleStart + batchSize
        batch = []
        if shuffleEnd>=len(self.enWords):
            batch = [fromSet[i] for i in self.shuffleIndices[0:shuffleEnd%len(self.enWords)]]
            end = len(self.enWords)
            
        return batch + [fromSet[i] for i in self.shuffleIndices[self.shuffleStart:shuffleEnd]]
    
    def getBatch(self, batchSize):
        '''Returns a combined batch of words for training'''
        enBatch = self.getBatchSolo(batchSize, self.enWords)
        hnBatch = self.getBatchSolo(batchSize, self.hnWords)
        self.shuffleStart += (batchSize+1)
        
        # Shuffle data after every iteration of training
        if self.shuffleStart >= len(self.enWords):
            random.shuffle(self.shuffleIndices)
            self.shuffleStart = 0
            
        return enBatch, hnBatch
            

### Loading and Splitting data

In [92]:
# splitTxt takes the filename and desired trainingDataSize as input,
# and splits the data into training, test and validation(15% of trainData) sets.
# Default trainingSize = 80
dl.splitTxt('datasetBig.txt')

trainDataL = DataLoader('train.txt')
testDataL = DataLoader('test.txt')
valDataL = DataLoader('val.txt')

Data splitted into: Train: 80  Test: 20
Size of training data: 30736
Size of validation data: 5424
Size of test data: 9041


### Example Encoded Data

In [93]:
print("English Word:", trainDataL[0][0],
      "\n\nEncoded Word:\n", wordEncode(trainDataL[0][1], hnVocabDict),
      "\n\nIndexes from Vocab:\n", getEnc(trainDataL[0][1], hnVocabDict))


print("\nHindi Word:", trainDataL[0][1],
      "\n\nEncoded Word:\n",wordEncode(trainDataL[0][0].lower(), enVocabDict),
      "\n\nIndexes from Vocab:\n", getEnc(trainDataL[0][0].lower(), enVocabDict))

English Word: anuttam 

Encoded Word:
 tensor([[[0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.]],

        ...,

        [[0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.]],

        [[1., 0., 0.,  ..., 0., 0., 0.]]]) 

Indexes from Vocab:
 tensor([[ 6],
        [41],
        [66],
        [37],
        [78],
        [37],
        [47],
        [ 0]])

Hindi Word: अनुत्तम 

Encoded Word:
 tensor([[[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 1., 0., 0., 0., 0., 0.

### Seq2Seq using Recurrent Neural Networks

In [318]:
class rnnEncoderDecoder(nn.Module):
    '''A encoder-decoder based RNN Seq2Seq model'''
    def __init__(self, inputSize, hiddenSize, outputSize, verbose=False):
        
        super(rnnEncoderDecoder, self).__init__()

        self.hiddenSize = hiddenSize
        self.outputSize = outputSize
        
        self.encoder = nn.GRU(inputSize, hiddenSize)
        self.decoder = nn.GRU(outputSize, hiddenSize)
        
        self.hidden2out = nn.Linear(hiddenSize, outputSize)
        self.softmax = nn.LogSoftmax(dim=2)
        
        self.verbose = verbose
        
    def forward(self, input_, maxOutChars=MaxOutChars, device='cpu', groundTruth = None):
        
        ###### Encoder #########
        encOut, hiddenOut = self.encoder(input_)
        
        if self.verbose:
            print("Encoder Input:", input_.shape)
            print("Encoder hidden output:", hiddenOut.shape)
            print("Encoder Output:", encOut.shape)
            
        ###### Decoder ##########
        decState = hiddenOut
        decInput = torch.zeros(1, 1, self.outputSize).to(device)
        
        if self.verbose:
            print("Decoder State:", decState.shape)
            print("Decoder Input:", decInput.shape)
        
        outputF = []
            
        for i in range(maxOutChars):
            
            output, decoder = self.decoder(decInput, decState)
            
            if self.verbose:
                print("Decoder Intermediate Output:", output.shape)
                
            output = self.hidden2out(decState)
            output = self.softmax(output)
            outputF.append(output.view(1, -1))
            
            if self.verbose:
                print("Decoder Output:", output.shape)
                self.verbose = False
                
            maxIndex = torch.argmax(output, 2, keepdim=True)
            if not groundTruth is None:
                maxIndex = groundTruth[i].reshape(1,1,1)
            newInput = torch.zeros(output.shape, device=device)
            newInput.scatter_(2, maxIndex, 1)
            
            decInput = newInput.detach()
            
        return outputF
            

### Seq2Seq using Recurrent Neural Networks with Attention

In [95]:
class rnnAttentionEncoderDecoder(nn.Module):
    '''A encoder-decoder based RNN Seq2Seq model with Attention'''
    def __init__(self, inputSize,hiddenSize,outputSize,verbose=False):
        
        super(rnnAttentionEncoderDecoder,self).__init__()
        
        self.hiddenSize = hiddenSize
        self.outputSize = outputSize
        
        self.encoder = nn.GRU(inputSize, hiddenSize)
        self.decoder = nn.GRU(hiddenSize*2, hiddenSize)
        
        self.hidden2out = nn.Linear(hiddenSize, outputSize)
        self.softmax = nn.LogSoftmax(dim=2)
        
        self.U = nn.Linear(self.hiddenSize, self.hiddenSize)
        self.W = nn.Linear(self.hiddenSize, self.hiddenSize)
        
        self.attention = nn.Linear(hiddenSize,1) 
        self.out2hidden = nn.Linear(self.outputSize, self.hiddenSize)
        
        self.verbose = verbose
        
    def forward(self, input_, maxOutChars=MaxOutChars, device='cpu', groundTruth=None):
        
        ########## encoder ####################
        encOut, hidden = self.encoder(input_)
        
        if self.verbose:
            print('Encoder output',encOut.shape)
        
        encOut = encOut.view(-1, self.hiddenSize)
            
        ########### decoder ###################
        decState = hidden
        decInput = torch.zeros(1, 1, self.outputSize).to(device)
        
        outputs=[]
        U = self.U(encOut)
        
        if self.verbose:
            print('Decoder state', decState.shape)
            print('Decoder input', decInput.shape)
            print('U', U.shape)
            
        for i in range(maxOutChars):
            
            W = self.W(decState.view(1,-1).repeat(encOut.shape[0],1))
            V = self.attention(torch.tanh(U + W))
            
            attentionWeights = fun.softmax(V.view(1,-1),dim=1)
                
            attentionApplied = torch.bmm(attentionWeights.unsqueeze(0), encOut.unsqueeze(0))
            
            embedding = self.out2hidden(decInput)
            
            decInput = torch.cat((embedding[0], attentionApplied[0]), 1).unsqueeze(0)

            out, decState = self.decoder(decInput, decState)
            
            if self.verbose:
                print('W',W.shape)
                print('V',V.shape)
                print('Attention Weights',attentionWeights.shape)
                print('Encoder output',encOut.shape)
                print('Attention applied',attentionApplied.shape)
                print('Decoder input',decInput.shape)
                print('Decoder intermediate output',out.shape)
                
            out = self.hidden2out(decState)
            out = self.softmax(out)
            outputs.append(out.view(1,-1))
            
            if self.verbose:
                print('Decoder Output', out.shape)
                self.verbose=False
                
            maxIndex = torch.argmax(out,2,keepdim=True)
            
            if not groundTruth is None:
                maxIndex = groundTruth[i].reshape(1,1,1)
                
            newInput = torch.zeros(out.shape, device=device)
            newInput.scatter_(2, maxIndex, 1)
            
            decInput = newInput.detach()
            
        return outputs

### Prediction

In [96]:
def getPrediction(model, word, maxOutChars, device='cpu'):
    '''Predicts the transliterated word for given word'''
    model.eval().to(device)
    sourceEnc = wordEncode(word, hnVocabDict)
    prediction = model(sourceEnc, maxOutChars)
    return prediction

### Sample Runs for both models

In [97]:
model = rnnEncoderDecoder(len(hnVocabDict),256,len(enVocabDict),verbose=True)
prediction = getPrediction(model,trainDataL[0][1],20)

Encoder Input: torch.Size([8, 1, 129])
Encoder hidden output: torch.Size([1, 1, 256])
Encoder Output: torch.Size([8, 1, 256])
Decoder State: torch.Size([1, 1, 256])
Decoder Input: torch.Size([1, 1, 27])
Decoder Intermediate Output: torch.Size([1, 1, 256])
Decoder Output: torch.Size([1, 1, 27])


In [98]:
model = rnnAttentionEncoderDecoder(len(hnVocabDict),256,len(enVocabDict),verbose=True)
prediction = getPrediction(model,trainDataL[0][1],20)

Encoder output torch.Size([8, 1, 256])
Decoder state torch.Size([1, 1, 256])
Decoder input torch.Size([1, 1, 27])
U torch.Size([8, 256])
W torch.Size([8, 256])
V torch.Size([8, 1])
Attention Weights torch.Size([1, 8])
Encoder output torch.Size([8, 256])
Attention applied torch.Size([1, 1, 256])
Decoder input torch.Size([1, 1, 512])
Decoder intermediate output torch.Size([1, 1, 256])
Decoder Output torch.Size([1, 1, 27])


### Training

In [99]:
def batchTrain(model, optimizer, criterion, batchSize, device='cpu', teacherForce = False):
    '''Trains the model on one batch of data'''
    
    model.train().to(device)
    optimizer.zero_grad()
    
    # get a batch of batchSize from the data
    enBatch, hnBatch = trainDataL.getBatch(batchSize)
    
    # Calculate the loss for the run on a single batch
    totalLoss = 0
    for i in range(batchSize):
        
        source = wordEncode(hnBatch[i], hnVocabDict, device)
        target = getEnc(enBatch[i], enVocabDict, device)
        outputs = model(source, target.shape[0], device, groundTruth = target if teacherForce else None)
        
        for index, output in enumerate(outputs):
            loss = criterion(output, target[index])/batchSize
            loss.backward(retain_graph = True)
            totalLoss+=loss
    
    # One step of optimizer
    optimizer.step()
    
    # Return the loss for the current batch
    return totalLoss/batchSize

In [326]:
def runBatchTrain(model, lr=0.1, numBatches=100, batchSize=10, displayFreq=10, device='cpu'):
    '''Trains the data on specified number of batches and batchSize'''
    
    model = model.to(device)
    
    # loss criterion
    criterion=nn.NLLLoss(ignore_index=-1)
    
    # optimizer
    optimizer = opt.Adam(model.parameters(),lr=lr)
    
    # teacherForce ratio
    teacherForceRatio = numBatches//3
    
    # loss array
    lossArr = np.zeros(numBatches+2)
    
    # Train the model for numBatches(epochs)
    for i in range(numBatches+1):
        
        lossArr[i+1] = (lossArr[i]*i+batchTrain(model,
                                              optimizer,
                                              criterion,
                                              batchSize,
                                              device=device, 
                                              teacherForce=i<teacherForceRatio))/(i+1)
        
        # display the loss according to the displayFreq
        if i%displayFreq==0 and i!=0:
            print('Iteration ',i,' Loss',lossArr[i])
    
    # Save the model
    torch.save(model,'model.pt')  
    
    # Return the total training loss
    return lossArr

In [220]:
# Training the RNN Seq2Seq model

start = datetime.now()
model = rnnEncoderDecoder(len(hnVocabDict),512,len(enVocabDict))
lossHistory = runBatchTrain(model, lr=0.01, numBatches=1000, batchSize=256, displayFreq=10, device=gpu)
end = datetime.now()

trainTimeRnn = end - start

Iteration  10  Loss 0.10385165363550186
Iteration  20  Loss 0.0951119065284729
Iteration  30  Loss 0.0907028540968895
Iteration  40  Loss 0.08697733283042908
Iteration  50  Loss 0.0839361771941185
Iteration  60  Loss 0.08151286095380783
Iteration  70  Loss 0.07939902693033218
Iteration  80  Loss 0.07770224660634995
Iteration  90  Loss 0.07626838237047195
Iteration  100  Loss 0.07506533712148666
Iteration  110  Loss 0.0739450454711914
Iteration  120  Loss 0.07313119620084763
Iteration  130  Loss 0.07230781763792038
Iteration  140  Loss 0.07161548733711243
Iteration  150  Loss 0.07104188203811646
Iteration  160  Loss 0.07052134722471237
Iteration  170  Loss 0.07002943754196167
Iteration  180  Loss 0.06962256878614426
Iteration  190  Loss 0.06922026723623276
Iteration  200  Loss 0.06887208670377731
Iteration  210  Loss 0.06854281574487686
Iteration  220  Loss 0.06817112863063812
Iteration  230  Loss 0.06787997484207153
Iteration  240  Loss 0.06763587892055511
Iteration  250  Loss 0.067413

In [219]:
# Training the Attention RNN Seq2Seq model

start = datetime.now()
modelAtt = rnnAttentionEncoderDecoder(len(hnVocabDict),512,len(enVocabDict))
lossHistory = runBatchTrain(modelAtt, lr=0.01, numBatches=1000, batchSize=256, displayFreq=10, device=gpu)
end = datetime.now()

trainTimeAttRnn = end - start

Iteration  10  Loss 0.10054649412631989
Iteration  20  Loss 0.08827856928110123
Iteration  30  Loss 0.0818902924656868
Iteration  40  Loss 0.07753408700227737
Iteration  50  Loss 0.07372581213712692
Iteration  60  Loss 0.06974831968545914
Iteration  70  Loss 0.06581412255764008
Iteration  80  Loss 0.06177011877298355
Iteration  90  Loss 0.0577525794506073
Iteration  100  Loss 0.053879402577877045
Iteration  110  Loss 0.05037663131952286
Iteration  120  Loss 0.047205567359924316
Iteration  130  Loss 0.04439013451337814
Iteration  140  Loss 0.04191342368721962
Iteration  150  Loss 0.03980052471160889
Iteration  160  Loss 0.03787409886717796
Iteration  170  Loss 0.036167118698358536
Iteration  180  Loss 0.034679729491472244
Iteration  190  Loss 0.033341508358716965
Iteration  200  Loss 0.03211692348122597
Iteration  210  Loss 0.031018635258078575
Iteration  220  Loss 0.02999475970864296
Iteration  230  Loss 0.029045648872852325
Iteration  240  Loss 0.028170866891741753
Iteration  250  Los

### Evaluation

In [232]:
def predict(data, model):
    '''Gives out predictions on whole data with specified model'''
    predY = []
    for i in range(len(data)):
        eng, hindi = data[i]
        pred = getPrediction(model, hindi, len(eng))
        en = []
        for index, p in enumerate(pred):
            value, indices = p.topk(1)
            en.append(enVocab[indices.tolist()[0][0] - 1])
        predY.append("".join(en))
    return predY

In [225]:
from sklearn_crfsuite import metrics

def elem_accuracy(y_hat, y_gold):
    '''Evaluation measure for predictions of a y-sequence, returns average position-wise matches.'''
    if len(y_hat) != len(y_gold):
        raise ValueError(len(y_hat),' != ',len(y_gold))
    matches = np.sum([y_hat[i]==y_gold[i] for i in range(len(y_gold))])
    return 1.0 * matches / len(y_gold)

In [233]:
# Make predictions on train/val/test sets using both the NN models

RnnpredYT = predict(trainDataL, model)
RnnpredYV = predict(valDataL, model)
RnnpredY = predict(testDataL, model)

AttRnnpredYT = predict(trainDataL, modelAtt)
AttRnnpredYV = predict(valDataL, modelAtt)
AttRnnpredY = predict(testDataL, modelAtt)

In [249]:
print("Training time for RNN-Seq2Seq:", trainTimeRnn)
print("Training time for Attention-RNN-Seq2Seq:", trainTimeAttRnn)

trainAccRnn = np.average([elem_accuracy(RnnpredYT[i], trainDataL[i][0]) for i in range(len(trainDataL))])
trainAccRnnAtt = np.average([elem_accuracy(AttRnnpredYT[i], trainDataL[i][0]) for i in range(len(trainDataL))])
print("\nTrain Accuracy:\nRNNSeq2Seq:", trainAccRnn, "\nAttentionRNNSeq2Seq:", trainAccRnnAtt)
print("\nTrain F1 score:\nRNNSeq2Seq:", metrics.flat_f1_score(RnnpredYT, trainDataL[:][0], average='weighted'),
     "\nAttRNNSeq2Seq:", metrics.flat_f1_score(AttRnnpredYT, trainDataL[:][0], average='weighted'))

valAccRnn = np.average([elem_accuracy(RnnpredYV[i], valDataL[i][0]) for i in range(len(valDataL))])
valAccRnnAtt = np.average([elem_accuracy(AttRnnpredYV[i], valDataL[i][0]) for i in range(len(valDataL))])
print("\nValidation Accuracy:\nRNNSeq2Seq:", valAccRnn, "\nAttentionRNNSeq2Seq:", valAccRnnAtt)
print("\nValidation F1 score:\nRNNSeq2Seq:", metrics.flat_f1_score(RnnpredYV, valDataL[:][0], average='weighted'),
     "\nAttRNNSeq2Seq:", metrics.flat_f1_score(AttRnnpredYV, valDataL[:][0], average='weighted'))

testAccRnn = np.average([elem_accuracy(RnnpredY[i], testDataL[i][0]) for i in range(len(testDataL))])
testAccRnnAtt = np.average([elem_accuracy(AttRnnpredY[i], testDataL[i][0]) for i in range(len(testDataL))])
print("\nTest Accuracy:\nRNNSeq2Seq:", testAccRnn, "\nAttentionRNNSeq2Seq:", testAccRnnAtt)
print("\nTest F1 score:\nRNNSeq2Seq:", metrics.flat_f1_score(RnnpredY, testDataL[:][0], average='weighted'),
     "\nAttRNNSeq2Seq:", metrics.flat_f1_score(AttRnnpredY, testDataL[:][0], average='weighted'))

Training time for RNN-Seq2Seq: 1:20:31.786652
Training time for Attention-RNN-Seq2Seq: 6:33:56.326687

Train Accuracy:
RNNSeq2Seq: 0.2685235098496182 
AttentionRNNSeq2Seq: 0.5670080225270107

Train F1 score:
RNNSeq2Seq: 0.3027218635348887 
AttRNNSeq2Seq: 0.5146689729493704

Validation Accuracy:
RNNSeq2Seq: 0.2619285840040818 
AttentionRNNSeq2Seq: 0.5646673559560728

Validation F1 score:
RNNSeq2Seq: 0.2944087980021897 
AttRNNSeq2Seq: 0.5135938082294154

Test Accuracy:
RNNSeq2Seq: 0.2606880289091068 
AttentionRNNSeq2Seq: 0.5669303035306439

Test F1 score:
RNNSeq2Seq: 0.2922726427816583 
AttRNNSeq2Seq: 0.5145139506703765


In [None]:
print("Model parameters for RNN Seq2Seq Model:", sum([param.nelement() for param in model.parameters()]))
print("Model parameters for Attetnion RNN Seq2Seq Model:", sum([param.nelement() for param in modelAtt.parameters()]))