In [None]:
import torch
import torchvision
import os
from torch.utils.data import Dataset,DataLoader
import cv2
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import math
import random

In [None]:
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torchtext.datasets import multi30k, Multi30k
from typing import Iterable, List
from torch.nn.utils.rnn import pad_sequence

In [None]:
from torch import Tensor
import torch.nn as nn
from torch.nn import Transformer

In [None]:
import torchvision.transforms as T
from PIL import Image

In [None]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(DEVICE)

## Preparation of Datasest

In [None]:
class MyDataset(Dataset):
    def __init__(self,Train,address):
        self.address = address
        self.Train = Train
        self.labels = []
        for i in Train:
            self.labels.append(text_transform(i[1]))
        self.labels = pad_sequence(self.labels , padding_value=PAD_IDX)
        
        
        
    def __getitem__(self,index):
        img = cv2.imread(self.address + self.Train[index][0])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = np.rot90(img)
        img = cv2.resize(img,(64,img.shape[0]),interpolation = cv2.INTER_AREA)
        if img.shape[0] > 1600:
            img = cv2.resize(img,(64,1600),interpolation = cv2.INTER_AREA)
        if img.shape[0] < 1600:
            padded_array = np.zeros((1600,64))
            shape = np.shape(img)
            padded_array[:shape[0],-shape[1]:] = img
            img = padded_array

        return torch.from_numpy(img).double(), self.labels[:,index], self.Train[index][0]
        
    def __len__(self):
        return len(self.Train)    

Please give addresses of train and test files containing information of images and their lables. Please see refernece Train and Test .txt files.

In [None]:
f = open('/home/tukl/data/HWR/Final_implementation/upti-2/train.txt','r')
lines = f.read().split('\n')
Train = [tuple(line.split('\t')) for line in lines]
f.close()

f3 = open('/home/tukl/data/HWR/Final_implementation/upti-2/test.txt','r')
lines = f3.read().split('\n')
Test = [tuple(line.split('\t')) for line in lines]
f3.close()

address = "/home/tukl/data/HWR/Final_implementation/upti-2/"

In [None]:
# Removing end of file spaces from train/test files data
Train.pop(len(Train)-1)
Test.pop(len(Test)-1)

Checking Train and Test number of samples

In [None]:
print(len(Train))
# print(len(Val))
print(len(Test))

## Vocabulary preparation from Train Data

In [None]:
ForVocab = []
for vcl in Train:
    temp = [s for s in vcl[1]]
    ForVocab.append(temp)
    
UNK_IDX, PAD_IDX, BOS_IDX, EOS_IDX = 0, 1, 2, 3

# Make sure the tokens are in order of their indices to properly insert them in vocab
special_symbols = ['<unk>', '<pad>', '<bos>', '<eos>']

vocab_transform = build_vocab_from_iterator(ForVocab,
                                            min_freq=1,
                                            specials=special_symbols,
                                            special_first=True)
vocab_transform.set_default_index(UNK_IDX)

### Defining transforms to be used

In [None]:
def sequential_transforms(*transforms):
    def func(txt_input):
        for transform in transforms:
            txt_input = transform(txt_input)
        return txt_input
    return func

def token_transform(line:str):
    chars = [char for char in line]
    return chars
def tensor_transform(token_ids: List[int]):
    return torch.cat((torch.tensor([BOS_IDX]), 
                      torch.tensor(token_ids), 
                      torch.tensor([EOS_IDX])))

text_transform = sequential_transforms(token_transform, #Tokenization
                                           vocab_transform, #Numericalization
                                           tensor_transform)

### Defining Data sets to be used

In [None]:
TrData = MyDataset(Train,address)
# VlData = MyDataset(Val,address)
TestData = MyDataset(Test,address)

### Preparation of Data Loaders

In [None]:
batch_size = 8
num_workers = 0

train_loader = torch.utils.data.DataLoader(TrData, batch_size=batch_size,
     num_workers=num_workers, shuffle = True)

# val_loader = torch.utils.data.DataLoader(VlData, batch_size=batch_size,
#      num_workers=num_workers, shuffle = True)

test_loader= torch.utils.data.DataLoader(TestData, batch_size=batch_size,
     num_workers=num_workers, shuffle = True)

#### Defining Token Embedding and Positional Encoding used for Transformer

In [None]:
class TokenEmbedding(nn.Module):
    def __init__(self, vocab_size: int, emb_size):
        super(TokenEmbedding, self).__init__()
        self.embedding = nn.Embedding(vocab_size, emb_size)
        self.emb_size = emb_size

    def forward(self, tokens: Tensor):
        return self.embedding(tokens.long()) * math.sqrt(self.emb_size)
    
class PositionalEncoding(nn.Module):
    def __init__(self,
                 emb_size: int,
                 dropout: float,
                 maxlen: int = 5000):
        super(PositionalEncoding, self).__init__()
        den = torch.exp(- torch.arange(0, emb_size, 2)* math.log(10000) / emb_size)
        pos = torch.arange(0, maxlen).reshape(maxlen, 1)
        pos_embedding = torch.zeros((maxlen, emb_size))
        pos_embedding[:, 0::2] = torch.sin(pos * den)
        pos_embedding[:, 1::2] = torch.cos(pos * den)
        pos_embedding = pos_embedding.unsqueeze(-2)

        self.dropout = nn.Dropout(dropout)
        self.register_buffer('pos_embedding', pos_embedding)

    def forward(self, token_embedding):
        return self.dropout(token_embedding + self.pos_embedding[:token_embedding.size(0), :])

### Functions for the maskings used

In [None]:
def generate_square_subsequent_mask(sz):
    mask = (torch.triu(torch.ones((sz, sz), device=DEVICE)) == 1).transpose(0, 1)
    mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
    return mask


def create_mask(src, tgt):
    src_seq_len = src.shape[0]
    tgt_seq_len = tgt.shape[0]

    tgt_mask = generate_square_subsequent_mask(tgt_seq_len)
    src_mask = torch.zeros((src_seq_len, src_seq_len),device=DEVICE).type(torch.bool)

    src_padding_mask = (src == PAD_IDX).transpose(0, 1)
    tgt_padding_mask = (tgt == PAD_IDX).transpose(0, 1)
    return src_mask, tgt_mask, src_padding_mask, tgt_padding_mask

## Architecture Proposed in the paper

In [None]:
class ConvTrans(nn.Module):
#     def __init__(self,d_model=256):
#         super(ConvTrans, self).__init__()
        
    def __init__(self,
                 num_encoder_layers: int,
                 num_decoder_layers: int,
                 emb_size: int,
                 nhead: int,
#                  src_vocab_size: int,
                 tgt_vocab_size: int,
                 dim_feedforward: int = 512,
                 dropout: float = 0.1):
        super(ConvTrans, self).__init__()
        self.emb_size = emb_size
        
#         self.src_tok_emb = TokenEmbedding(src_vocab_size, emb_size)
#         self.tgt_tok_emb = TokenEmbedding(tgt_vocab_size, emb_size)
        self.positional_encoding = PositionalEncoding(
            emb_size, dropout=dropout)
        self.tgt_tok_emb = TokenEmbedding(tgt_vocab_size, emb_size)
        
        
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=1),
            nn.BatchNorm2d(16),
            nn.LeakyReLU(),
            nn.MaxPool2d(2, 2)
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(),
            nn.MaxPool2d(2, 2)
        )
        
        self.conv3 = nn.Sequential(
            nn.Conv2d(32, 48, 3, padding=1),
            nn.BatchNorm2d(48),
            nn.LeakyReLU()
        )
        
        self.conv4 = nn.Sequential(
            nn.Conv2d(48, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(),
            nn.MaxPool2d((1,2)),
            nn.Dropout2d(0.2)
        )
        
        self.conv5 = nn.Sequential(
            nn.Conv2d(64, 96, 3, padding=1),
            nn.BatchNorm2d(96),
            nn.LeakyReLU()
        )
        
        self.conv6 = nn.Sequential(
            nn.Conv2d(96, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(),
            nn.MaxPool2d((1,2)),
            nn.Dropout2d(0.2)
        )
        
        self.conv7 = nn.Sequential(
            nn.Conv2d(128, emb_size, 4),
            nn.BatchNorm2d(emb_size),
            nn.LeakyReLU()
        )
        
        self.transformer = Transformer(d_model=emb_size,
                                       nhead=nhead,
                                       num_encoder_layers=num_encoder_layers,
                                       num_decoder_layers=num_decoder_layers,
                                       dim_feedforward=dim_feedforward,
                                       dropout=dropout)
        
        self.generator = nn.Linear(emb_size, tgt_vocab_size)
        
    
    def forward(self,
                x: Tensor,
                trg: Tensor):
#                 src_mask: Tensor,
#                 tgt_mask: Tensor,
#                 src_padding_mask: Tensor,
#                 tgt_padding_mask: Tensor,
#                 memory_key_padding_mask: Tensor):
        
       
                                
        x = torch.unsqueeze(x,1)
        x = self.conv1(x)
#         print("Conv1 Shape: ",x.shape)
        x = self.conv2(x)
#         print("Conv2 Shape: ",x.shape)
        x = self.conv3(x)
#         print("Conv3 Shape: ",x.shape)
        x = self.conv4(x)
#         print("Conv4 Shape: ",x.shape)
        x = self.conv5(x)
#         print("Conv5 Shape: ",x.shape)
        x = self.conv6(x)
#         print("Conv6 Shape: ",x.shape)
        x = self.conv7(x)
#         print("Conv7 Shape: ",x.shape)

        x = x.squeeze(-1)
#         print(x.shape)
        
        x = torch.transpose(x,1,2)
        x = torch.transpose(x,0,1)
        
#         x = x.view(-1,x.shape[0],self.emb_size)
        
#         print(x.shape)
#         print("TRG shape before transpose",trg.shape)
        trg = torch.transpose(trg,0,1)
#         print("TRG shape after transpose",trg.shape)
        
        src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(x, trg)
        src_padding_mask = src_padding_mask[:,:][0]
#         print (src_mask.shape, tgt_mask.shape,src_padding_mask.shape,tgt_padding_mask.shape)
        src_emb = self.positional_encoding(x)
        tgt_emb = self.positional_encoding(self.tgt_tok_emb(trg))
#         tgt_emb = tgt_emb.view(-1,tgt_emb.shape[0],self.emb_size)
#         print("Feature Maps Dimension before Transformer:",x.shape)
#         print("TGT_EMB Dimension before Transformer:",tgt_emb.shape)
        
        
#         outs = self.transformer(src_emb, tgt_emb)
        outs = self.transformer(src_emb, tgt_emb, None, tgt_mask, None, 
                                None, tgt_padding_mask)
#         outs = self.transformer(src_emb, tgt_emb, src_mask, tgt_mask, None, 
#                                 None, tgt_padding_mask)
#                                 src_padding_mask, tgt_padding_mask, memory_key_padding_mask)
        
        return self.generator(outs)                        
        
    def encode(self, src: Tensor):
        
        x = src
        x = self.conv1(x)
#         print("Conv1 Shape: ",x.shape)
        x = self.conv2(x)
#         print("Conv2 Shape: ",x.shape)
        x = self.conv3(x)
#         print("Conv3 Shape: ",x.shape)
        x = self.conv4(x)
#         print("Conv4 Shape: ",x.shape)
        x = self.conv5(x)
#         print("Conv5 Shape: ",x.shape)
        x = self.conv6(x)
#         print("Conv6 Shape: ",x.shape)
        x = self.conv7(x)
#         print("Conv7 Shape: ",x.shape)
        x = x.squeeze(-1)
        x = torch.transpose(x,1,2)
        x = torch.transpose(x,0,1)
        
#         return self.transformer.encoder(self.positional_encoding(src))
        return self.transformer.encoder(self.positional_encoding(x))

#     def decode(self, tgt: Tensor, memory: Tensor, tgt_mask: Tensor):
#         return self.transformer.decoder(self.positional_encoding(
#                           self.tgt_tok_emb(tgt)), memory, tgt_mask)
    def decode(self, tgt: Tensor, memory: Tensor, tgt_mask: Tensor):
        
        tgt = torch.transpose(tgt,0,1)
#         print("TGT:",tgt.shape)
#         print("Memory:",memory.shape)
#         print("Tgt_Mask:",tgt_mask.shape)
        
        return self.transformer.decoder(self.positional_encoding(
                          self.tgt_tok_emb(tgt)), memory,
                          tgt_mask)
    
    def sampleFunc(self,x):
        
        x = torch.unsqueeze(x,0)
        x = torch.unsqueeze(x,0)
        print(x.shape)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)
        x = self.conv7(x)

        x = x.squeeze(-1)
        x = x.view(-1,x.shape[0],self.emb_size)
        
        return x
    
# model = ConvTrans()
# model = model.double()

### Defining Model Hyperparameters, Optimizer Function, Loss Function etc.

In [None]:
¶torch.manual_seed(0)

TGT_VOCAB_SIZE = len(vocab_transform)
EMB_SIZE = 256
NHEAD = 8
FFN_HID_DIM = 512
BATCH_SIZE = 16
NUM_ENCODER_LAYERS = 3
NUM_DECODER_LAYERS = 3

transformer = ConvTrans(NUM_ENCODER_LAYERS, NUM_DECODER_LAYERS, EMB_SIZE, 
                             NHEAD, TGT_VOCAB_SIZE, FFN_HID_DIM)

transformer = transformer.double()

for p in transformer.parameters():
    if p.dim() > 1:
        nn.init.xavier_uniform_(p)

transformer = transformer.to(DEVICE)

loss_fn = torch.nn.CrossEntropyLoss(ignore_index=PAD_IDX)

optimizer = torch.optim.Adam(transformer.parameters(), lr=0.0003, betas=(0.9, 0.98), eps=1e-9)

In [None]:
print(transformer)

#### Loading of a model to use pre-trained one

In [None]:
transformer.load_state_dict(torch.load('Epoch-2 checkpoint.pth'))

### Functions for training and evaluating the model

In [None]:
from torch.utils.data import DataLoader

def train_epoch(model, optimizer,train_loader):
    model.train()
    losses = 0
#     train_iter = Multi30k(split='train', language_pair=(SRC_LANGUAGE, TGT_LANGUAGE))
#     train_dataloader = DataLoader(train_iter, batch_size=BATCH_SIZE, collate_fn=collate_fn)
    
    for src, tgt,add in train_loader:
        src = src.to(DEVICE)
        tgt = tgt.to(DEVICE)

        tgt_input = tgt[:, :-1]

        src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_input)

        logits = model(src, tgt_input)

        optimizer.zero_grad()

        tgt_out = tgt[:, 1:]
        tgt_out = torch.transpose(tgt_out,0,1)
        loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
        loss.backward()

        optimizer.step()
        losses += loss.item()
    

    return losses / len(train_loader)


def evaluate(model,val_loader):
    model.eval()
    losses = 0
    correct = 0
    total = 0
    Accuracy = 0

#     val_iter = Multi30k(split='valid', language_pair=(SRC_LANGUAGE, TGT_LANGUAGE))
#     val_dataloader = DataLoader(val_iter, batch_size=BATCH_SIZE, collate_fn=collate_fn)

    for src, tgt,add in val_loader:
        src = src.to(DEVICE)
        tgt = tgt.to(DEVICE)

        tgt_input = tgt[:, :-1]

#         src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_input)

        logits = model(src, tgt_input)
        src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, torch.transpose(tgt_input,0,1))
        
        Out,Ind = torch.max(logits,2)
        
#         tgt_out = tgt[1:, :]
        tgt_out = tgt[:, 1:]
        tgt_out = torch.transpose(tgt_out,0,1)
        
        loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
        losses += loss.item()
        
        preds = torch.argmax(logits, axis=-1)
        mask = torch.logical_not(tgt_padding_mask).transpose(0,1)
        correct += ((preds == tgt_out)*mask).sum()
        total += mask.sum()
        
    Accuracy = (correct/total)*100
        
 
    return losses / len(val_loader) , Accuracy

In [None]:
import gc
import time

In [None]:
gc.collect()
time.sleep(3)

### Training Code

In [None]:
from timeit import default_timer as timer
NUM_EPOCHS = 10
for epoch in range(1, NUM_EPOCHS+1):
    start_time = timer()
    train_loss = train_epoch(transformer, optimizer,train_loader)
    end_time = timer()
#     gc.collect()
#     time.sleep(3)
#     torch.cuda.empty_cache()
    val_loss, Accuracy = evaluate(transformer,test_loader)
    torch.save(transformer.state_dict(), 'Epoch-{} checkpoint.pth'.format(epoch))
    print((f"Epoch: {epoch}, Train loss: {train_loss:.3f}, Val loss: {val_loss:.3f}, "f"Epoch time = {(end_time - start_time):.3f}s"f" Accuracy = {Accuracy:.3f}%"))
    fw = open("Performance.txt",'a')
    fw.write((f"\nEpoch: {epoch}, Train loss: {train_loss:.3f}, Val loss: {val_loss:.3f}, "f"Epoch time = {(end_time - start_time):.3f}s"f" Accuracy = {Accuracy:.3f}%"))
    fw.close()

### Function for using Beam Search for decoding

In [None]:
def process_beam(images, model, max_length=150, device=DEVICE, flip=False,k=5):
    with torch.no_grad():
        inp = images
        
        predicted_indices = torch.empty((k,inp.shape[0],max_length)).type(torch.LongTensor).to(device)
        predicted_probs = torch.empty((k,inp.shape[0],max_length)).type(torch.FloatTensor).to(device)
        prediction_scores = torch.zeros((k,inp.shape[0])).type(torch.FloatTensor).to(device)

        features = model.encode(inp)
        
        indices = torch.LongTensor([BOS_IDX] + [PAD_IDX] * (max_length - 1)).repeat(inp.shape[0],1).to(device) # (B, l_max)
#         print(indices.shape)
        tgt_mask = (generate_square_subsequent_mask(indices.size(1)).type(torch.bool)).to(device)
        
#         print(indices.shape)
#         print(features.shape)
#         print(tgt_mask.shape)
        scores = model.decode(indices, features, tgt_mask) # (B, l_max, V)
        
        scores = torch.transpose(scores,0,1)
        
        scores = model.generator(scores)
        parent_node_indices, parent_node_probs = top_k(scores,k) # (k, B, l_max), (k, B, l_max) 
        
        indices = torch.empty((k, inp.shape[0], max_length)).type(torch.LongTensor).to(device)
        indices_probs = torch.zeros(k, inp.shape[0], max_length).type(torch.FloatTensor).to(device)
        
        indices[:,:,:] = PAD_IDX #[PAD_TOKEN]
        indices[:,:,0] = BOS_IDX #[SOS_TOKEN]
        indices[:,:,1] = parent_node_indices[:,:,0]
        indices_probs[:,:,0] = 1
        indices_probs[:,:,1] = parent_node_probs[:,:,0]
        
        cache_indices = torch.empty((k, k, inp.shape[0], max_length)).type(torch.LongTensor).to(device)
        cache_probs = torch.empty((k, k, inp.shape[0], max_length)).type(torch.FloatTensor).to(device)
        
        for i in range(2, max_length):
            
            tgt_mask = (generate_square_subsequent_mask(indices.size(-1)).type(torch.bool)).to(device)
#             print(indices.shape)
#             print(tgt_mask.shape)
            
            for j in range(k):
                temp = model.decode(indices[j,:,:].reshape(inp.shape[0],max_length), features, tgt_mask)
                
                temp = torch.transpose(temp,0,1)
                
                temp = model.generator(temp)
                cache_indices[j], cache_probs[j] = top_k(temp,k)
            
            token_probs = indices_probs[:,:,i-1] # (k,B)
            nx_token_probs = cache_probs[:,:,:,i-1] #(k,k,B)
            
            probs = torch.empty((k,k,inp.shape[0]))
            for j in range(k):
                for l in range(k):
                    probs[j,l,:] = token_probs[j,:] + nx_token_probs[j,l,:]

            probs = probs.reshape(k*k, inp.shape[0]) # (k^2,B)

            best_preds = torch.sort(probs,axis=0)
            current_probs = best_preds.values[-k:,:]
            index_locs = best_preds.indices[-k:,:]
            
            p_nodes = index_locs // k
            d_nodes = index_locs % k
            
            indices_temp = indices.detach().clone()
            indices_probs_temp = indices_probs.detach().clone()

            for y in range(inp.shape[0]):
                for j in range(k):
                    
                    if cache_indices[p_nodes[j,y],d_nodes[j,y],y,i-1] == EOS_IDX and indices[p_nodes[j,y],y,:i].any() != EOS_IDX:
                        if (current_probs[j,y].item()/i**0.7 > prediction_scores[j,y].item()) or prediction_scores[j,y]==0:
                            predicted_indices[j,y,:] = indices[p_nodes[j,y],y,:]
                            predicted_probs[j,y,:] = indices_probs[p_nodes[j,y],y,:]
                            predicted_probs[j,y,i] = current_probs[j,y]
                            prediction_scores[j,y] = (1/(i**0.7))*current_probs[j,y]
                        
                    indices_temp[j,y,:] = indices[p_nodes[j,y],y,:]   
                    indices_probs_temp[j,y,:] = indices_probs[p_nodes[j,y],y,:]

                    indices_temp[j,y,i] = cache_indices[p_nodes[j,y],d_nodes[j,y],y,i-1]
                    indices_probs_temp[j,y,i] = current_probs[j,y]
                    
                indices[:,y,:] = indices_temp[:,y,:]
                indices_probs[:,y,:] = indices_probs_temp[:,y,:]

    probabilities = prediction_scores
    pick = torch.sort(probabilities, axis=0)

    for y in range(inp.shape[0]):
        indices[0,y,:] = indices[pick.indices[-1,y],y,:]

    indices = indices[0,:,:]
    indices = indices.cpu().numpy()
    
    lines = []
    for idx in indices:
        line=""
        pred = "".join(vocab_transform.lookup_tokens(list(idx))).replace("<bos>", "").replace("<eos>", "*")
        for char in pred:
            if char == '*':
                break
            else:
                line +=char
            
#         print(line)
        lines.append(line)
    return lines

def top_k(scores,k):
    scores = torch.softmax(scores,axis=-1) #(B, l_max, V)
    scores = torch.log(scores)
    sorted_score = torch.sort(scores,axis=-1)
    sorted_score_indices = sorted_score.indices
    sorted_score_probs = sorted_score.values
    top_k_indices = (sorted_score_indices[:,:,-k:]) # (B,l_max,k)
    top_k_probs = (sorted_score_probs[:,:,-k:])
    top_k_indices = torch.swapaxes(torch.swapaxes(top_k_indices,1,2),0,1) #(k, B, l_max)
    top_k_probs = torch.swapaxes(torch.swapaxes(top_k_probs,1,2),0,1)
    return top_k_indices, top_k_probs

In [None]:
import pandas as pd
from tqdm import tqdm

### Code for Evaluating using Beam Search

In [None]:
for x, y in tqdm(test_loader):
    x = torch.unsqueeze(x,1)
#     y = torch.transpose(y,0,1)
    transformer.to(DEVICE)
    transformer.eval()
    x = x.to(DEVICE)
    lines_x = process_beam(x, transformer, device=DEVICE)
    line_y = []
    for line in y:
        temp = "".join(vocab_transform.lookup_tokens(list(line))).replace("<bos>", "").replace("<eos>", "").replace("<pad>", "")
        line_y.append(temp)
    for pred, label in zip(lines_x, line_y):
            new_row = {'preds':pred, 'labels':label}
            df = df.append(new_row, ignore_index=True)
    df.to_csv('preds_final.csv')
    

### Function for using Greedy Decode

In [None]:

def greedy_decode(model, src, src_mask, max_len, start_symbol):
    src = src.to(DEVICE)
#     src_mask = src_mask.to(DEVICE)

    memory1 = model.encode(src)
    
    y = []
    for w in range(memory1.shape[1]):
        memory = memory1[:,w,:]
        memory.to(DEVICE)
        memory = torch.unsqueeze(memory,1)
        ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).to(DEVICE)
        yl = []
        for i in range(max_len-1):
            for i in range (100):
                tgt_mask = (generate_square_subsequent_mask(ys.size(1))
                                    .type(torch.bool)).to(DEVICE)
                out = transformer.decode(ys,memory,tgt_mask)
                out = out.transpose(0, 1)
                prob = transformer.generator(out[:, -1])
                tpk = torch.topk(prob,k=7,dim=1)
                lst = [int(tpk[1][0][v]) for v in range(len(tpk[1][0]))]
                _, next_word = torch.max(prob, dim=1)
                next_word = next_word.item()
                ys = torch.cat([ys,torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=1)
                yl.append(next_word)
                if next_word == EOS_IDX:
                        break
            output = "".join(vocab_transform.lookup_tokens(yl)).replace("<bos>", "").replace("<eos>", "")
        y.append(output)
    return y
def translate(model: torch.nn.Module, src_sentence: str):
    model.eval()
    src = text_transform(src_sentence).view(-1, 1)
    num_tokens = src.shape[0]
    src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool)
    tgt_tokens = greedy_decode(
        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=BOS_IDX).flatten()
    return " ".join(vocab_transform.lookup_tokens(list(tgt_tokens.cpu().numpy()))).replace("<bos>", "").replace("<eos>", "")

### Evaluating using Greedy Decode

In [None]:
transformer.eval()
for x1, y,add in tqdm(test_loader):
    x1 = torch.unsqueeze(x1,1)
    transformer.to(DEVICE)
    transformer.eval()
    x1 = x1.to(DEVICE)
    lines_x = greedy_decode(transformer,x1,src_mask = None,max_len=150,start_symbol=BOS_IDX)
        
    line_y = []
    for line in y:
        temp = "".join(vocab_transform.lookup_tokens(list(line))).replace("<bos>", "").replace("<eos>", "").replace("<pad>", "")
        line_y.append(temp)
    for pred, label, address in zip(lines_x, line_y,add):
            new_row = {'Address': address,'preds':pred, 'labels':label}
            df = df.append(new_row, ignore_index=True)
    df.to_csv('prac_preds_final.csv')