**Note:** Initial development was done for a Persian-English dataset and hence some variables have misleading names. As long as you configure setter variables for language and functions correctly, you should be able to train on whichever language you wish to. You can add to the list of languages and provide correct filepaths to reuse this code for a different language pair. The code is currently configured to run for the Ro-En data in our final experiments.



In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torchtext
torchtext.__version__
!pip install torchtext==0.6.0
!pip install xlsxwriter
import xlsxwriter
!pip install openpyxl
from openpyxl import load_workbook

In [None]:
import sys
import os
from os import path

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torch.utils.tensorboard import SummaryWriter



from torchtext.datasets import Multi30k
from torchtext import data, datasets
from torchtext.data import Field, BucketIterator
from torchtext.data.metrics import bleu_score

import numpy as np
import spacy
import random
import time
from datetime import datetime
import ast
import nltk
from nltk.translate.bleu_score import SmoothingFunction

In [None]:
source_languages = ['fa', 'ro']
set_language_idx = 1 #set language index from above list
set_language = source_languages[1] #Set source language
set_mode = '15'  #Only required if set_language is ro. choose between '15' or 'all'

In [None]:
if set_language == 'fa':
    en_file_path = 'drive/MyDrive/en-fa/mizan_en.txt'
    fa_file_path = 'drive/MyDrive/en-fa/mizan_fa.txt'
    data_size = 500 #set to 'full' to load all
    train_path = 'drive/MyDrive/en-fa/train_mizan_'
    validation_path = 'drive/MyDrive/en-fa/val_mizan_'
    test_path = 'drive/MyDrive/en-fa/test_mizan_'
    base_path = 'drive/MyDrive/en-fa/'
    weights_path = 'drive/MyDrive/en-fa/weights/'

elif set_language == 'ro':
    if set_mode == '15':
        en_file_path = 'drive/MyDrive/en-ro/testing_nevoie_15_en.txt'
        ro_file_path = 'drive/MyDrive/en-ro/testing_nevoie_15_ro.txt'
    elif set_mode == 'all':
        en_file_path = 'drive/MyDrive/en-ro/testing_nevoie_all_en.txt'
        ro_file_path = 'drive/MyDrive/en-ro/testing_nevoie_all_ro.txt'     
    data_size = 500 #set to 'full' to load all

    if set_mode == '15':
        train_path = 'drive/MyDrive/en-ro/training_nevoie_15_'
        validation_path = 'drive/MyDrive/en-ro/validating_nevoie_15_'
        test_path = 'drive/MyDrive/en-ro/testing_nevoie_15_'
    elif set_mode == 'all':
        train_path = 'drive/MyDrive/en-ro/training_nevoie_all_'
        validation_path = 'drive/MyDrive/en-ro/validating_nevoie_all_'
        test_path = 'drive/MyDrive/en-ro/testing_nevoie_all_'        
    base_path = 'drive/MyDrive/en-ro/'
    weights_path = 'drive/MyDrive/en-ro/weights/'

In [None]:
if not path.exists(base_path+'s2s_attention_progress.xlsx'):
    workbook = xlsxwriter.Workbook(base_path+'s2s_attention_progress.xlsx')
    worksheet = workbook.add_worksheet()
    worksheet.write('A1', 'Run_ID') 
    worksheet.write('B1', 'Epoch') 
    worksheet.write('C1', 'Bleu')
    worksheet.write('D1', 'Mean_loss')
    worksheet.write('E1', 'Duration')
    worksheet.write('F1', 'Sample')
    workbook.close() 

In [None]:
#train/val/test splitter (NOT REQUIRED FOR RO)
#set file paths as required
if set_language_idx == 0:

    en_sentences = []
    with open(en_file_path, encoding="utf8") as f:
        for line in f:
            
            en_sentences.append(str(line))
    fa_sentences = []
    with open(fa_file_path, encoding="utf8") as f:
        for line in f:
            fa_sentences.append(str(line))
            
    mode = ['train', 'val', 'test']
    lang = ['en','fa']
    data_split = [35000, 1000, 2000]                 #set train, val, test sizes
    sample_indices = random.sample(range(len(en_sentences)), sum(data_split))
    for language in lang:
        try:
            os.remove(train_path + language + '.txt')
            os.remove(test_path + language + '.txt')
            os.remove(validation_path + language + '.txt')
            print(language + ' File delete successful')
        except OSError:
            print('no deletion')
            pass
        if language == 'en':
            sentences = en_sentences
        elif language == 'fa':
            sentences = fa_sentences
        
        for task in mode:
            if task == 'train':
                write_sentences = [sentences[i] for i in sample_indices[:data_split[0]]]
                #print(len(write_sentences))
    
            elif task == 'val':
                write_sentences = [sentences[i] for i in sample_indices[data_split[0]:data_split[0]+data_split[1]]]
            elif task == 'test':
                write_sentences = [sentences[i] for i in sample_indices[data_split[0]+data_split[1]:sum(data_split)]]
            outF = open(base_path + task+'_mizan_'+language + '.txt', 'w', encoding="utf8")
            base_path + task+'_mizan_'+language + '.txt'
            for line in write_sentences:
              # write line to output file
                outF.write(line)
                #outF.write("\n")
            outF.close()

else:
    print('not reuired for selected source language')

In [None]:
#RO VAL SET CREATION _ ONLY NEED TO RUN FIRST TIME
try:
    os.remove(validation_path + 'ro.txt')
    os.remove(validation_path + 'en.txt')
    print(language + ' File delete successful')
except OSError:
    print('no deletion')

en_sentences = []
with open(en_file_path, encoding="utf8") as f:
    for line in f:
        
        en_sentences.append(str(line))
ro_sentences = []
with open(ro_file_path, encoding="utf8") as f:
    for line in f:
        ro_sentences.append(str(line))
val_size = 50 #set size of val file
en_val = en_sentences[:val_size]
ro_val = ro_sentences[:val_size]
for language in ['en', 'ro']:
    if language == 'en':
        write_sentences = en_val
    elif language == 'ro':
        write_sentences = ro_val
    outF = open(base_path + 'validating_nevoie_' + set_mode + '_'+ language + '.txt', 'w', encoding="utf8")
        
    for line in write_sentences:
      # write line to output file
        outF.write(line)
        #outF.write("\n")
    outF.close()

In [None]:
train_len_checker = []
val_len_checker = []
test_len_checker = []
with open(train_path+'en.txt', encoding="utf8") as f:
    for line in f:
        train_len_checker.append(str(line))
print(len(train_len_checker))
with open(validation_path+'en.txt', encoding="utf8") as f:
    for line in f:
        val_len_checker.append(str(line))
print(len(val_len_checker))
with open(test_path+'en.txt', encoding="utf8") as f:
    for line in f:
        test_len_checker.append(str(line))
print(len(test_len_checker))

In [None]:
#generate random test case
en_sentences = []
with open(en_file_path, encoding="utf8") as f:
    i = 0
    for line in f:
        if data_size != 'full':
            if i==data_size:
                break
            else:
                en_sentences.append(str(line))
        else:
            en_sentences.append(str(line))
        i+=1
en_sentences = [sentence.lower() for sentence in en_sentences]

fa_sentences = []
with open(ro_file_path, encoding="utf8") as f:  ### CHANGE FILEPATH ACCORDING TO LANGUAGE
    i = 0
    for line in f:
        if data_size != 'full':
            if i==data_size:
                break
            else:
                fa_sentences.append(str(line))
        else:
            fa_sentences.append(str(line))
        i+=1

random_src_sentence = fa_sentences[random.sample(range(50,len(fa_sentences)), 1)[0]].replace('.', '').replace(':', '').replace(',','').replace(';','').replace('!','').replace('\n', '')
print(random_src_sentence)

In [None]:
#tokenizer
def tokenizer(sentence):
    sentence = sentence.lower().replace('\n', '')
    sentence = sentence.replace('.', '').replace(':', '').replace(',',' ').replace(';','').replace('!','').replace("'", '').replace('  ', ' ').replace('?', '')  #might change
    tokenized = sentence.split(' ')
    return tokenized

In [None]:
#build vocab
farsi = Field(tokenize=tokenizer, init_token='<sos>', eos_token='<eos>')
english = Field(tokenize=tokenizer, init_token='<sos>', eos_token='<eos>')
source_exts = ['fa.txt', 'ro.txt']
train_data = datasets.TranslationDataset(
    path=train_path, exts=(source_exts[set_language_idx], 'en.txt'),
    fields=(farsi, english))
validation_data = datasets.TranslationDataset(
    path=validation_path, exts=(source_exts[set_language_idx], 'en.txt'),
    fields=(farsi, english))
test_data = datasets.TranslationDataset(
    path=test_path, exts=(source_exts[set_language_idx], 'en.txt'),
    fields=(farsi, english))
test_data = test_data[50:]

farsi.build_vocab(train_data, max_size = 100000, min_freq = 2)
english.build_vocab(train_data, max_size = 100000, min_freq = 2)
train_data[5].__dict__.values()

In [None]:
len(test_data)

In [None]:
#test and evaluation

bleu_libs = ['torchtext', 'nltk']
bleu_idx = 1
bleu_version = bleu_libs[bleu_idx]   #set bleu choice

def translate_sentence(model, sentence, farsi, english, device, max_length=50):
    if type(sentence) == str:
        tokens = tokenizer(sentence)
    elif type(sentence) == list:
        tokens = sentence
    # Add <SOS> and <EOS> in beginning and end respectively
    tokens.insert(0, farsi.init_token)
    tokens.append(farsi.eos_token)

    # Go through each german token and convert to an index
    text_to_indices = [farsi.vocab.stoi[token] for token in tokens]

    # Convert to Tensor
    sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)

    with torch.no_grad():
        outputs_encoder, hiddens, cells = model.encoder(sentence_tensor)    

    outputs = [english.vocab.stoi["<sos>"]]

    for _ in range(max_length):
        previous_word = torch.LongTensor([outputs[-1]]).to(device)

        with torch.no_grad():
            output, hiddens, cells = model.decoder(
                previous_word, outputs_encoder, hiddens, cells
            )
            best_guess = output.argmax(1).item()

        outputs.append(best_guess)

        # Model predicts it's the end of the sentence
        if output.argmax(1).item() == english.vocab.stoi["<eos>"]:
            break

    translated_sentence = [english.vocab.itos[idx] for idx in outputs]

    # remove start token
    return translated_sentence[1:]

def bleu(data, model, farsi, english, device):
    targets = []
    outputs = []

    for example in data:
        src = vars(example)["src"]
        trg = vars(example)["trg"]

        prediction = translate_sentence(model, src, farsi, english, device)
        prediction = prediction[:-1]  # remove <eos> token
        
        if bleu_idx == 0:
            targets.append([trg])
        elif bleu_idx == 1:
            targets.append(trg)
        outputs.append(prediction)
    
    if bleu_idx == 0:
        return bleu_score(outputs, targets)
    elif bleu_idx == 1:
        smoothie = SmoothingFunction().method4
        return nltk.translate.bleu_score.corpus_bleu(targets, outputs, smoothing_function=smoothie)

In [None]:
def save_checkpoint(state, filename=weights_path+"s2s_a_checkpoint.pth.tar"):
    print("=> Saving checkpoint")
    torch.save(state, filename)


def load_checkpoint(checkpoint, model, optimizer):
    print("=> Loading checkpoint")
    model.load_state_dict(checkpoint["state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer"])





class EncoderAttnRNN(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, p):
        super(EncoderAttnRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.dropout = nn.Dropout(p)
        # dropout wont work for rnn if we have only one layer
        rnn_dropout = dropout_p if num_layers > 1 else 0.0

        self.embedding = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=rnn_dropout, bidirectional=True)

        self.fc_hidden = nn.Linear(hidden_size * 2, hidden_size)
        self.fc_cell = nn.Linear(hidden_size * 2, hidden_size)


#         self.dropout = nn.Dropout(p)

    def forward(self, x):
        # x = [src_length, batch_size]

        embedding = self.dropout(self.embedding(x))
        # embedding shape: (seq_length, batch_size, embedding_size)

        # ------------------------------------------------
        # -------- DIFFERENCE WITH A BASIC Seq2Seq -------
        encoder_states, (hidden, cell) = self.rnn(embedding)
        # encoder_states = [src_length, batch_size, hidden_size*num_directions]
        # hidden = [n_layers*num_directions, batch_size, hidden_size]
        # hidden has one forward and one backward

        # dim=2
        # hidden = [2, batch_size, hidden_size]
        # Use forward, backward cells and hidden through a linear layer
        # so that it can be input to the decoder which is not bidirectional
        # Also using index slicing ([idx:idx+1]) to keep the dimension
        hidden = self.fc_hidden(torch.cat((hidden[0:1], hidden[1:2]), dim=2))
        cell = self.fc_cell(torch.cat((cell[0:1], cell[1:2]), dim=2))
        # we also need to send all h_j (hidden states for all time steps) to the decoder
        # so we also return encoder_states
        # ------------------------------------------------

        return encoder_states, hidden, cell



class DecoderAttnRNN(nn.Module):
    def __init__(
        self, input_size, embedding_size, hidden_size, output_size, num_layers, p
    ):
        super(DecoderAttnRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.dropout = nn.Dropout(p)
        # dropout wont work for rnn if we have only one layer
        rnn_dropout = dropout_p if num_layers > 1 else 0.0



        self.embedding = nn.Embedding(input_size, embedding_size)
        # hidden_size*2, one for forward and one for backward
        # embedding_size is just the normal one, like in the basic seq2seq model

        self.rnn = nn.LSTM(hidden_size * 2 + embedding_size, hidden_size, num_layers, dropout=rnn_dropout)

        # hidden from encoder and also s_(i-1) which is the previous hidden of decoder
        # therefore it is *3
        self.energy = nn.Linear(hidden_size * 3, 1)
        # also try this later: self.softmax = nn.LogSoftmax(dim=0)
        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(p)
        self.softmax = nn.Softmax(dim=0)
        self.relu = nn.ReLU()

    def forward(self, x, encoder_states, hidden, cell):
        # this hidden is from the decoder
        # and encoder_states are from the encoder
        # we need to concat them together and then send through energy layer

        # encoder_states = []
        # x = [batch_size], we want it to be (1, batch_size),
        # seq_length is 1 here because we are sending in a single word and not a sentence
        x = x.unsqueeze(0)
        # x = [1, batch_size]

        embedding = self.dropout(self.embedding(x))
        # embedding = [1, batch_size, embedding_size]

        # ------------------------------------------------
        # -------- DIFFERENCE WITH A BASIC Seq2Seq -------
        sequence_length = encoder_states.shape[0]
        # in order to add the hidden from decoder to encoder
        # we repeat the hidden from decoder
        h_reshaped = hidden.repeat(sequence_length, 1, 1)
        # h_reshaped: (seq_length, N, hidden_size*2)


        # add them along dim=2, at the end will be hidden_size*3 dimension
        energy = self.relu(self.energy(torch.cat((h_reshaped, encoder_states), dim=2)))
        attention = self.softmax(energy)
        # attention = [sequence_length, batch_size, 1]
        # in self.softmax we said dim=0, so here it will normalize over sequence_length

#         # in order to use bmm for multiplication, we need to permute
#         attention = attention.permute(1,2,0)
#         # attention = [batch_size, 1, sequence_length]
#         encoder_states = encoder_states.permute(1,0,2)
#         # encoder_states = [batch_size, sequence_length, hidden_size*2]
#         # now bmm on these two:
#         # [batch_size, 1, sequence_length] and
#         # [batch_size, sequence_length, hidden_size*2]
#         context_vector = torch.bmm(attention, encoder_states)
#         # context_vector = [batch_size, 1, hidden_size*2]
#         context_vector = context_vector.permute(1,0,2)
#         # context_vector = [1, batch_size, hidden_size*2]

        # N is batch_size
        # attention: (seq_length, N, 1), snk
        # encoder_states: (seq_length, N, hidden_size*2), snl
        # we want context_vector: (1, N, hidden_size*2), i.e knl
        context_vector = torch.einsum("snk,snl->knl", attention, encoder_states)

        # this is for one time step, we need to concat all of them
        # along hidden_size*2 to get hidden_size*3
        rnn_input = torch.cat((context_vector, embedding), dim=2)
        # rnn_input: (1, N, hidden_size*2 + embedding_size)
        # ------------------------------------------------

        outputs, (hidden, cell) = self.rnn(rnn_input, (hidden, cell))
        # outputs shape = [1, batch_size, hidden_size]

        predictions = self.fc(outputs)
        # predictions = self.softmax(self.fc(x_out))
        # predictions = [1, batch_size, length_of_vocab]
        # but to send it to loss function we want it to be
        #  (batch_size, length_target_vocab)
        # so we're just gonna remove the first dim
        # predictions = [N, hidden_size]
        predictions = predictions.squeeze(0)
        # predictions = [batch_size, length_of_vocab]

        return predictions, hidden, cell


In [None]:
class Seq2SeqAttn(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2SeqAttn, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, source, target, teacher_force_ratio=0.5):
        batch_size = source.shape[1]
        target_len = target.shape[0]
        target_vocab_size = len(english.vocab)

        # prepare hidden states
        outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)

        # ------------------------------------------------
        # -------- DIFFERENCE WITH A BASIC Seq2Seq -------
        encoder_states, hidden, cell = self.encoder(source)
        # ------------------------------------------------

        # First input will be <SOS> token
        x = target[0]

        for t in range(1, target_len):

            # ------------------------------------------------
            # -------- DIFFERENCE WITH A BASIC Seq2Seq -------
            # at 'every' time step we will also send the encoder_states to the decoder,
            # and update hidden, cell

            # Use previous hidden, cell as context from encoder at start
            output, hidden, cell = self.decoder(x, encoder_states, hidden, cell)
            # ------------------------------------------------

            # Store prediction for current time step
            # Store next output prediction, along the first dimension (target_len)
            outputs[t] = output


            # Get the best word the Decoder predicted (index in the vocabulary)
            best_guess = output.argmax(1)

            # With probability of teacher_force_ratio we take the actual next word
            # otherwise we take the word that the Decoder predicted it to be.
            # Teacher Forcing is used so that the model gets used to seeing
            # similar inputs at training and testing time, if teacher forcing is 1
            # then inputs at test time might be completely different than what the
            # network is used to.
            x = target[t] if random.random() < teacher_force_ratio else best_guess

        return outputs


In [None]:
# Training
### We're ready to define everything we need for training our Seq2Seq model ###
now = datetime.now()
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
run_id_dict = {'start_time': dt_string, 'source sentence': random_src_sentence}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

load_model = True
model_loaded = False
save_model = True

# Training hyperparameters
num_epochs = 100
learning_rate = 3e-4
batch_size = 32

# Model hyperparameters
input_size_encoder = len(farsi.vocab)
input_size_decoder = len(english.vocab)
output_size = len(english.vocab)
encoder_embedding_size = 300
decoder_embedding_size = 300
hidden_size = 1024
num_layers = 1
# enc_dropout = 0.0
# dec_dropout = 0.0
dropout_p = 0.5
rnn_type = 'lstm'

# Tensorboard to get nice loss plot
writer = SummaryWriter(f"runs/loss_plot")
step = 0


train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, validation_data, test_data),
    batch_size=batch_size,
    sort_within_batch=True,
    sort_key=lambda x: len(x.src),
    device=device
)



encoder_net = EncoderAttnRNN(input_size_encoder, encoder_embedding_size, hidden_size, num_layers, dropout_p).to(device)
decoder_net = DecoderAttnRNN(
    input_size_decoder,
    decoder_embedding_size,
    hidden_size,
    output_size,
    num_layers,
    dropout_p,
).to(device)

model = Seq2SeqAttn(encoder_net, decoder_net).to(device)


optimizer = optim.Adam(model.parameters(), lr=learning_rate)

pad_idx = english.vocab.stoi["<pad>"]
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)

wb = load_workbook(filename = base_path+'s2s_attention_progress.xlsx')
ws = wb.active

if load_model:

    if not model_loaded:
        wb = load_workbook(filename = base_path+'s2s_attention_progress.xlsx')
        ws = wb.active
        start_epoch = len(ws["A"])-1
        run_id_retrieved = ws.cell(row=len(ws["A"]), column = 1).value
        run_id_retrieved = run_id_retrieved.replace("'start_time'", '"start_time"').replace("'source sentence'", '"source sentence"')
        run_id_parsed = ast.literal_eval(run_id_retrieved)
        sentence = run_id_parsed['source sentence'].rstrip().lstrip()

    if path.exists(weights_path+'s2s_a_checkpoint.pth.tar'):
        print('weight path exists')
        if model_loaded == False:
            load_checkpoint(torch.load(weights_path+'s2s_a_checkpoint.pth.tar'), model, optimizer)
            print('weight load check')
            model_loaded = True

if not load_model:
    start_epoch = 0
    sentence = random_src_sentence

train_iterator, valid_iterator, test_iterator =  BucketIterator.splits((train_data, validation_data, test_data),
                                                                        batch_size = batch_size,
                                                                        sort_within_batch = True,
                                                                        # sort_key: we have all sentences in avarious length, here
                                                                        # it is priotrize to batch with same length to minimize padding to save on compute
                                                                        sort_key = lambda x: len(x.src),
                                                                        device = device)




for epoch in range(start_epoch, num_epochs):

    start_time = time.time()

    print(f"[Epoch {epoch} / {num_epochs}]")

    if save_model:
        checkpoint = {
            "state_dict": model.state_dict(),
            "optimizer": optimizer.state_dict(),
        }
        save_checkpoint(checkpoint)

    model.eval()

    translated_sentence = translate_sentence(
        model, sentence, farsi, english, device, max_length=50
    )

    print(f"Translated example sentence: \n {translated_sentence}")

    model.train()
    
    losses = []

    for batch_idx, batch in enumerate(train_iterator):
        # Get input and targets and get to cuda
        inp_data = batch.src.to(device)
        target = batch.trg.to(device)

        # Forward prop
        output = model(inp_data, target)

        # Output is of shape (trg_len, batch_size, output_dim) but Cross Entropy Loss
        # doesn't take input in that form. For example if we have MNIST we want to have
        # output to be: (N, 10) and targets just (N). Here we can view it in a similar
        # way that we have output_words * batch_size that we want to send in into
        # our cost function, so we need to do some reshapin. While we're at it
        # Let's also remove the start token while we're at it
        output = output[1:].reshape(-1, output.shape[2])
        target = target[1:].reshape(-1)

        optimizer.zero_grad()
        loss = criterion(output, target)
        
        losses.append(loss.item())    

        # Back prop
        loss.backward()

        # Clip to avoid exploding gradient issues, makes sure grads are
        # within a healthy range
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)

        # Gradient descent step
        optimizer.step()

        # Plot to tensorboard
        writer.add_scalar("Training loss", loss, global_step=step)
        step += 1

    mean_loss = sum(losses) / len(losses)
    score = bleu(test_data, model, farsi, english, device)
    print(f"Bleu score {score * 100:.2f}")
    print("--- %s minutes ---" % ((time.time() - start_time)/60))

    write_row_idx = len(ws["A"])+1
    if not load_model:
        ws.cell(row=write_row_idx,column=1).value = str(run_id_dict)
    else:
        ws.cell(row=write_row_idx,column=1).value = run_id_retrieved
    ws.cell(row=write_row_idx,column=2).value = epoch
    ws.cell(row=write_row_idx,column=3).value = round(score * 100, 2) #Bleu
    ws.cell(row=write_row_idx,column=4).value = mean_loss #Mean Loss
    ws.cell(row=write_row_idx,column=5).value = ((time.time() - start_time)/60) #Duration
    ws.cell(row=write_row_idx,column=6).value = str(translated_sentence) #sample translation
    wb.save(base_path+'s2s_attention_progress.xlsx')