In [None]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
import time


import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np
import os
import math
import gc

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [None]:
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

def normalizeString(s, label = False):
    if label:
        s = re.sub(r"([.!?'’§\/()\[\]%\\…:©])", r" ", s)
    else:    
        s = re.sub(r"([.!?'’§\/()\[\]%\\…:©,;-])", r" ", s)
    s = unicodeToAscii(s.lower().strip())
    return s

def checkBlank(line):
    for x in line:
        if x.isalpha() or x.isnumeric():
            return False
    return True

def splitToLine(words, start, end):
    if start < 0:
        start = 0
    if end > len(words):
        end = len(words)
    line = ''
    for i in range(start, end):
        line += words[i]
        line += ' '
    return line      
      
def getRel(words):
    d_count = 0
    count = 0
    art = 0
    retLine = ''

    startIndex = 0
    endIndex = 0
    for index in range(1, len(words)):
        if 'ihlal' in words[index]:
            if endIndex == 0:  
                startIndex = index-2
                endIndex = index+5
            elif index-2 < endIndex:
                endIndex = index+5
            elif index-2 >= endIndex:
                retLine += splitToLine(words, startIndex, endIndex)
                retLine += '\n'
                startIndex = index-2
                endIndex = index+5
        elif words[index].isdigit():
            try:
                if int(words[index])<50 and len(words[index])<3:
                    c = True
                    if index + 2 < len(words):
                        if words[index+2].isdigit():
                            if int(words[index+2])>50 or len(words[index+2])>2:
                                c = False
                    if index + 1 < len(words):
                        if words[index+1].isdigit():
                            if int(words[index+1])>50 or len(words[index+1])>2:
                                c = False
                    if c == True:
                        if endIndex == 0:  
                            startIndex = index-2
                            endIndex = index+5
                        elif index-2 < endIndex:
                            endIndex = index+5
                        elif index-2 >= endIndex:
                            retLine += splitToLine(words, startIndex, endIndex)
                            retLine += '\n'
                            startIndex = index-2
                            endIndex = index+5
            except ValueError:
                flag = False
        
    if endIndex != 0:
        retLine += splitToLine(words, startIndex, endIndex)
    if retLine == '':
        return retLine
    else:
        return retLine

In [None]:
PAD_token = 0
SOF_token = 1
EOF_token = 2
UNK_token = 3

class Tokens:
    def __init__(self, label = False):
        self.word2index = {}
        self.word2count = {}
        if label:
          self.index2word = {0:"PAD" , 1:"SOF", 2: "EOF"}
          self.n_words = 3
        else:
          self.index2word = {0:"PAD" , 1:"SOF", 2: "EOF", 3: "UNK"}
          self.n_words = 4  # Count SOS and EOS

    def addSentence(self, sentence):
        i = 0
        for word in sentence.split():
            self.addWord(word)
            i += 1
        return i

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [None]:
class CustomDataset(Dataset):
    def __init__(self, data_dir, label_dir, transform=None, target_transform=None):
        self.data_dir = data_dir
        self.label_dir = label_dir
        self.transform = transform
        self.target_transform = target_transform
        self.tokens = Tokens()
        self.label_tokens = Tokens(True)
        self.data = []
        self.label = []
        self.max_len = 0
        self.setTokens()
        
    
    def setTokens(self):
        for filename in sorted(os.listdir(self.data_dir)):
            f = os.path.join(self.data_dir, filename)
            if os.path.isfile(f):
                i = 0
                self.data.append(f)
                words = []
                lines = open(f, encoding='utf-8').read().strip().split('\n')
                for l in lines:
                    if not checkBlank(l):
                        words += l.split()
                l = getRel(words)
                i += self.tokens.addSentence(l)
                if i > self.max_len:
                    self.max_len = i
        self.max_len += 2
        for filename in sorted(os.listdir(self.label_dir)):
            f = os.path.join(self.label_dir, filename)
            if os.path.isfile(f):
                self.label.append(f)
                lines = open(f, encoding='utf-8').read().strip().split('\n')
                for l in lines: 
                    self.label_tokens.addSentence(l)

    def tensorFromText(self, text, label = False):
        indexes = []
        indexes.append(SOF_token)
        words = []
        for i in text:
            if not checkBlank(i):
                if(label):
                    indexes += [self.label_tokens.word2index[word] for word in i.split()]
                else:
                    words += i.split()
        if not label:
            i = getRel(words)
            indexes += [self.tokens.word2index.get(word, UNK_token) for word in i.split()]
        indexes.append(EOF_token)
        return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

    def __len__(self):
        return len(os.listdir(self.label_dir))
    
    def __getitem__(self, idx):
        source = open(self.data[idx], encoding='utf-8').read().strip().split('\n')
        target = open(self.label[idx], encoding='utf-8').read().strip().split('\n')

        return (self.tensorFromText(source), self.tensorFromText(target, True))

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ln -s /content/drive/My\ Drive/ /myDrive
!ls /myDrive

 b2_subdataset_model.pt   b4_subdataset_70f1_model.pt   bestdataset.zip
 b2_subdataset_optim.pt   b4_subdataset_70f1_optim.pt   checkpoints
 b4_atten_model_94f.pt	  b4_subdataset_71f1_model.pt  'Colab Notebooks'
 b4_atten_model.pt	  b4_subdataset_71f1_optim.pt   dataset.zip
 b4_atten_optim_94f.pt	  b4_subdataset_model.pt        GTU
 b4_atten_optim.pt	  b4_subdataset_optim.pt       'My Drive'


In [None]:
!cp /myDrive/dataset.zip ./

In [None]:
!unzip dataset.zip

In [None]:
dataset = CustomDataset('data', 'label')

In [None]:
print(dataset.data)
print(dataset.label)

['data/AFFAIRE BШL ШNЮAAT TAAHHЪT TШCARET LШMШTED ЮШRKETШ v. TURKEY - [Turkish Translation] by the Turkish Ministry of Justice .txt', 'data/CASE OF  ELIF NAZAN SEKER v. TURKEY  Turkish Translation by the Turkish Ministry of Justice.txt', 'data/CASE OF A B AND C  v. IRELAND  Turkish Translation by the COE Human Rights Trust Fund.txt', 'data/CASE OF A. AND OTHERS v. THE UNITED KINGDOM  Turkish Translation by the COE Human Rights Trust Fund.txt', 'data/CASE OF A. YILMAZ v. TURKEY -  [Turkish Translation] summary by the Turkish Ministry of Foreign Affairs.txt', 'data/CASE OF A.B. v. SWITZERLAND  Turkish Translation legal summary by the Turkish Ministry of Justice.txt', 'data/CASE OF A.C. AND OTHERS v. SPAIN  Turkish Translation legal summary by the Turkish Ministry of Justice.txt', 'data/CASE OF A.D. AND OTHERS v. TURKEY  Turkish Translation by the Turkish Ministry of Justice.txt', 'data/CASE OF A.D. v. TURKEY -  [Turkish Translation] summary by the Turkish Ministry of Foreign Affairs.txt'

In [None]:
print(dataset.label_tokens.index2word)

{0: 'PAD', 1: 'SOF', 2: 'EOF', 3: '6', 4: '-', 5: '1', 6: ',', 7: '13', 8: 'ihlal', 9: 'edildiğine', 10: 'nolu', 11: 'protokolün', 12: '5', 13: '3', 14: '7', 15: 'edilmediğine', 16: '38', 17: 'esas', 18: 've', 19: 'usul', 20: '2', 21: '8', 22: '4', 23: ';', 24: '10', 25: 'c', 26: '14', 27: '11', 28: 'd', 29: '34', 30: '18', 31: 'b', 32: '9', 33: 'etmeyeceğine', 34: 'a', 35: 'edeceğine', 36: '12', 37: 'f', 38: 'e', 39: '17'}


In [None]:
def collate_fn(data):
    """
       data: is a list of tuples with (example, label, length)
             where 'example' is a tensor of arbitrary shape
             and label/length are scalars
    """
    sources = [data[x][0] for x in range(len(data))] # list of tensors
    targets = [data[x][1] for x in range(len(data))] # list of tensors


    lengths = [x.size(0) for x in sources]
    max_len = max(lengths)
    n_ftrs = sources[0].size(1)
    features = torch.zeros((len(sources), max_len, n_ftrs), dtype=int, device=device)

    target_lengths = [x.size(0) for x in targets]
    target_max_len = max(target_lengths)
    target_n_ftrs = targets[0].size(1)
    target_features = torch.zeros((len(targets), target_max_len, target_n_ftrs), dtype=int, device=device)

    for i in range(len(data)):
        j, k = sources[i].size(0), sources[i].size(1)
        x, y = targets[i].size(0), targets[i].size(1)
        features[i] = torch.cat([sources[i], torch.zeros((max_len - j, k), dtype=int, device=device)])
        target_features[i] = torch.cat([targets[i], torch.zeros((target_max_len - x, y), dtype=int, device=device)])
    
    features = torch.squeeze(features, dim = 2) 
    target_features = torch.squeeze(target_features, dim = 2) 

    return torch.transpose(features, 0, 1), torch.transpose(target_features, 0, 1)

In [None]:
from torch.utils.data import DataLoader
#batch_size = 4
batch_size = 1

In [None]:
dataset_size = len(dataset)
indices = list(range(dataset_size))
validation_split = .15
test_split = .1
shuffle_dataset = True
random_seed= 42
v_split = int(np.floor(validation_split * dataset_size))
t_split = int(np.floor(test_split * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices, test_indices = indices[v_split + t_split:], indices[:v_split], indices[v_split:v_split + t_split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)
test_sampler = SubsetRandomSampler(test_indices)

train_loader = torch.utils.data.DataLoader(dataset, collate_fn=collate_fn, batch_size=batch_size, 
                                           sampler=train_sampler)
validation_loader = torch.utils.data.DataLoader(dataset, collate_fn=collate_fn, batch_size=batch_size,
                                                sampler=valid_sampler)
test_loader = torch.utils.data.DataLoader(dataset, collate_fn=collate_fn, batch_size=batch_size,
                                                sampler=test_sampler)

In [None]:
print(train_indices)

[2922, 340, 1099, 1543, 748, 2755, 662, 480, 2138, 871, 1453, 2952, 549, 2020, 2708, 742, 2415, 2637, 2566, 2292, 1909, 790, 2120, 1446, 247, 829, 1124, 2720, 626, 347, 278, 1783, 2280, 655, 1213, 838, 2624, 2588, 2350, 1709, 2023, 976, 445, 617, 1474, 1778, 2517, 1244, 2995, 2552, 547, 2210, 1623, 263, 1397, 2353, 1815, 353, 1075, 208, 613, 2090, 2099, 1498, 1135, 2880, 2078, 57, 2483, 811, 2195, 561, 2943, 229, 2509, 1721, 2179, 1450, 1234, 1071, 2849, 1037, 889, 1053, 707, 424, 69, 2249, 2535, 2839, 2406, 2963, 2581, 2092, 940, 620, 2519, 1196, 2067, 1032, 1505, 1576, 2130, 2044, 1395, 279, 1465, 1503, 2282, 2838, 2723, 553, 834, 962, 596, 2451, 802, 2136, 2985, 2719, 440, 508, 76, 2481, 649, 522, 1487, 2208, 1494, 654, 2816, 1538, 1423, 1651, 630, 2541, 1507, 2941, 482, 1927, 1113, 2563, 1088, 438, 1717, 2443, 856, 1983, 166, 59, 611, 530, 2787, 49, 1883, 1011, 858, 1017, 48, 719, 2390, 2652, 2246, 1159, 632, 1338, 2127, 2602, 1436, 25, 2965, 219, 2818, 276, 2157, 1480, 1235, 1593,

In [None]:
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(input_dim, emb_dim)
        
        self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, src):
        #src = [src len, batch size]
        embedded = self.dropout(self.embedding(src))
        
        #embedded = [src len, batch size, emb dim]
        output, (hidden, cell) = self.rnn(embedded)
        #outputs = [src len, batch size, hid dim * n directions]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        
        #outputs are always from the top hidden layer
        return output, hidden, cell

In [None]:
class Attn(nn.Module):
    def __init__(self, method, hid_dim):
        super(Attn, self).__init__()
        
        self.method = method
        self.hid_dim = hid_dim

        if self.method == 'general':
            self.linear_in = nn.Linear(self.hid_dim, hid_dim)

        self.softmax = nn.Softmax(dim=-1)

    def forward(self, hidden, encoder_outputs):
        #encoder = [src len, batch size, hid dim]    
        #hidden = [n layers, batch size, hid dim]  

        encoder_outputs = encoder_outputs.transpose(0, 1)

        batch_size, output_len, dimensions = encoder_outputs.size()
        query_len = 1

        if self.method == "general":
            encoder_outputs = encoder_outputs.reshape(batch_size * output_len, dimensions)
            encoder_outputs = self.linear_in(encoder_outputs)
            encoder_outputs = encoder_outputs.reshape(batch_size, output_len, dimensions)

        # hidden = batch 1 hid x  enc = batch hid src
        attention_scores = torch.bmm(hidden[-1].unsqueeze(1), encoder_outputs.transpose(1, 2).contiguous())

        # attention_scores = batch 1 src
        attention_scores = attention_scores.view(batch_size * query_len, output_len)
        attention_weights = self.softmax(attention_scores)
        attention_weights = attention_weights.view(batch_size, query_len, output_len)

        #  [batch 1 src]
        # [batch src hidden]

        context = torch.bmm(attention_weights, encoder_outputs)
        # [batch 1 hidden]
 
        combined = torch.cat((context, hidden[-1].unsqueeze(1)), dim=2)
        # [batch 1 hidden*2]

        return combined, attention_weights

In [None]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, attn_model, output_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        
        self.attn_model = attn_model
        self.output_dim = output_dim
        self.hid_dim = hid_dim
        self.emb_dim = emb_dim
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(output_dim, emb_dim*2)
        
        self.rnn = nn.LSTM(emb_dim*2, hid_dim, n_layers, dropout = dropout)
        
        self.fc_out = nn.Linear(hid_dim*2, output_dim)
        self.dropout = nn.Dropout(dropout)
        if attn_model != 'none':
            self.attn = Attn(attn_model, hid_dim)
        
    def forward(self, input, hidden, cell, encoder_outputs):
        #input = [batch size]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        
        #n directions in the decoder will both always be 1, therefore:
        #hidden = [n layers, batch size, hid dim]
        #context = [n layers, batch size, hid dim]

        input = input.unsqueeze(0)
        
        #input = [1, batch size]
        
        embedded = self.dropout(self.embedding(input))
        #embedded = [1, batch size, emb dim]

        output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
        #output = [seq len, batch size, hid dim * n directions]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        
        #seq len and n directions will always be 1 in the decoder, therefore:
        #output = [1, batch size, hid dim]
        #hidden = [n layers, batch size, hid dim]
        #cell = [n layers, batch size, hid dim]

        # Calculate attention from current RNN state and all encoder outputs; apply to encoder outputs
        output, attn_weights = self.attn(hidden, encoder_outputs)

        output = self.fc_out(output.squeeze(1))

        return output, hidden, cell, attn_weights

In [None]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        
        assert encoder.hid_dim == decoder.hid_dim, \
            "Hidden dimensions of encoder and decoder must be equal!"
        assert encoder.n_layers == decoder.n_layers, \
            "Encoder and decoder must have equal number of layers!"
        
    def forward(self, src, trg, teacher_forcing_ratio = 0.5):
        
        #src = [src len, batch size]
        #trg = [trg len, batch size]
        #teacher_forcing_ratio is probability to use teacher forcing
        #e.g. if teacher_forcing_ratio is 0.75 we use ground-truth inputs 75% of the time

        batch_size = trg.shape[1]
        trg_len = trg.shape[0]
        trg_vocab_size = self.decoder.output_dim
        
        #tensor to store decoder outputs
        outputs = torch.zeros(trg_len, batch_size, trg_vocab_size, device = self.device)
        #last hidden state of the encoder is used as the initial hidden state of the decoder
        encoder_outputs, hidden, cell = self.encoder(src)
        
        #first input to the decoder is the  tokens
        input = trg[0,:]

        if self.eval and batch_size == 1:
            t = 1
            outputs = torch.zeros(1, batch_size, trg_vocab_size, device = self.device)
            while(1):
                output, hidden, cell, attn_weights = self.decoder(input, hidden, cell, encoder_outputs)
                #get the highest predicted token from our predictions
                top1 = output.argmax(1) 

                output = output.unsqueeze(0)
                outputs = torch.cat((outputs, output), dim=0) 

                if top1 == EOF_token:
                    break
                input = top1
                t += 1
        else: 
            for t in range(1, trg_len):     
                #insert input token embedding, previous hidden and previous cell states
                #receive output tensor (predictions) and new hidden and cell states
                output, hidden, cell, attn_weights = self.decoder(input, hidden, cell, encoder_outputs)

                #place predictions in a tensor holding predictions for each token
                outputs[t] = output

                #decide if we are going to use teacher forcing or not
                teacher_force = random.random() < teacher_forcing_ratio
                    
                #get the highest predicted token from our predictions
                top1 = output.argmax(1) 

                #if teacher forcing, use actual next token as next input
                #if not, use predicted token
                input = trg[t] if teacher_force else top1
        
        return outputs

In [None]:
INPUT_DIM = dataset.tokens.n_words
OUTPUT_DIM = dataset.label_tokens.n_words
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 2
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = AttnDecoderRNN('general', OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)

model = Seq2Seq(enc, dec, device).to(device)

In [None]:
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        
model.apply(init_weights)

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 21,307,944 trainable parameters


In [None]:
#optimizer = optim.Adam(model.parameters())
optimizer = optim.Adam(model.parameters(), lr = 0.001)

In [None]:
criterion = nn.CrossEntropyLoss(ignore_index=PAD_token)

In [None]:
def train(model, iterator, optimizer, criterion, clip):
    
    model.train()
    
    epoch_loss = 0
    j = 0
    for i, batch in enumerate(iterator):
        src, trg = batch
        
        optimizer.zero_grad()
        output = model(src, trg)
          
        #trg = [trg len, batch size]
        #output = [trg len, batch size, output dim]
        
        output_dim = output.shape[-1]

        output = output[1:].view(-1, output_dim)
        trg = trg[1:].reshape(-1)
        
        #trg = [(trg len - 1) * batch size]
        #output = [(trg len - 1) * batch size, output dim]
        
        loss = criterion(output, trg)
        
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)

        optimizer.step()
        
        epoch_loss += loss.item()

        del output
        del loss
        del batch
        del src
        del trg
        torch.cuda.empty_cache()
        gc.collect()
        
    return epoch_loss / len(iterator)

In [None]:
def evaluate(model, iterator, criterion):
    
    model.eval()
    
    epoch_loss = 0
    
    with torch.no_grad():
    
        for i, batch in enumerate(iterator):
            src, trg = batch
            
            output = model(src, trg, 0) #turn off teacher forcing

            #trg = [trg len, batch size]
            #output = [trg len, batch size, output dim]

            output_dim = output.shape[-1]

            output = output[1:].view(-1, output_dim)
            trg = trg[1:].reshape(-1)

            #trg = [(trg len - 1) * batch size]
            #output = [(trg len - 1) * batch size, output dim]

            loss = criterion(output, trg)
            
            epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [None]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [None]:
N_EPOCHS = 30
CLIP = 1

best_valid_loss = float('inf')
#best_valid_loss = best_loss
last_epoch = 0

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(model, train_loader, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, validation_loader, criterion)
    
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    #if valid_loss < best_valid_loss:
    #    best_valid_loss = valid_loss
    #    torch.save(model.state_dict(), '/myDrive/atten_best_model.pt')
    #    torch.save(optimizer.state_dict(), '/myDrive/atten_best_optim.pt')
    torch.save(model.state_dict(), '/myDrive/b4_atten_model.pt')
    torch.save(optimizer.state_dict(), '/myDrive/b4_atten_optim.pt')
    
    last_epoch += 1
    #print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'Epoch: {last_epoch:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

Epoch: 31 | Time: 2m 51s
	Train Loss: 0.074 | Train PPL:   1.077
	 Val. Loss: 1.301 |  Val. PPL:   3.674
Epoch: 32 | Time: 2m 51s
	Train Loss: 0.058 | Train PPL:   1.060
	 Val. Loss: 1.547 |  Val. PPL:   4.699
Epoch: 33 | Time: 2m 52s
	Train Loss: 0.083 | Train PPL:   1.087
	 Val. Loss: 1.443 |  Val. PPL:   4.233
Epoch: 34 | Time: 2m 51s
	Train Loss: 0.068 | Train PPL:   1.070
	 Val. Loss: 1.448 |  Val. PPL:   4.254
Epoch: 35 | Time: 2m 52s
	Train Loss: 0.061 | Train PPL:   1.063
	 Val. Loss: 1.808 |  Val. PPL:   6.101
Epoch: 36 | Time: 2m 53s
	Train Loss: 0.075 | Train PPL:   1.078
	 Val. Loss: 1.635 |  Val. PPL:   5.132
Epoch: 37 | Time: 2m 52s
	Train Loss: 0.069 | Train PPL:   1.071
	 Val. Loss: 1.664 |  Val. PPL:   5.280
Epoch: 38 | Time: 2m 50s
	Train Loss: 0.070 | Train PPL:   1.073
	 Val. Loss: 1.590 |  Val. PPL:   4.903
Epoch: 39 | Time: 2m 50s
	Train Loss: 0.048 | Train PPL:   1.049
	 Val. Loss: 1.689 |  Val. PPL:   5.413
Epoch: 40 | Time: 2m 51s
	Train Loss: 0.067 | Train PPL

In [None]:
def evaluateRandomly(model): 
    i = 0
    model.eval()
    with torch.no_grad():
        for x,y in test_loader:
            trg = []
            for tok in y:
                trg.append(tok[0].item())
            
            a = ' '.join([dataset.label_tokens.index2word[idx] for idx in trg])
            print("LABEL: ", end = " ")
            print(a)

            outputs = model(x, y, teacher_forcing_ratio=0)
            output_idx = [torch.argmax(a[0]) for a in outputs]       
            b = ' '.join([dataset.label_tokens.index2word[idx.item()] for idx in output_idx])
            print("PREDICTION: ", end = " ") 
            print(b)
            print()
            i += 1
    print(i)

In [None]:
def runTime(model, loader):
    model.eval()
    model_total_time = 0
    system_total_time = 0

    i = 0
    with torch.no_grad():
        for x, y in test_loader:
            start_time = time.time()
            outputs = model(x, y, teacher_forcing_ratio=0)
            model_end_time = time.time()
            output_idx = [torch.argmax(a[0]) for a in outputs]       
            b = ' '.join([dataset.label_tokens.index2word[idx.item()] for idx in output_idx])
            system_end_time = time.time()

            model_total_time += model_end_time - start_time
            system_total_time += system_end_time - start_time
            i += 1
    
    print("Average model time: ", model_total_time/i, "   Average system time:  ", system_total_time/i)

In [None]:
def calcRouge1(pred, target):
    counter = 0
    for x in target:
        for y in pred:
            if x == y:
                counter += 1
                break
    prec = counter / len(pred)
    rec = counter / len(target)
    if prec == 0 and rec == 0:
        return prec, rec, 0
    f1 = 2 * prec * rec / (prec + rec)
    return prec, rec, f1

In [None]:
def calcRouge2(pred, target):
    counter = 0
    for i in range(len(target)-1):
        for j in range(len(pred)-1):
            if target[i] == pred[j] and target[i+1] == pred[j+1]:
                counter += 1
                break
    prec = counter / (len(pred)-1)
    rec = counter / (len(target)-1)
    if prec == 0 and rec == 0:
        return prec, rec, 0
    f1 = 2 * prec *rec / (prec + rec)
    return prec, rec, f1

In [None]:
def calcRouge3(pred, target):
    counter = 0
    for i in range(len(target)-2):
        for j in range(len(pred)-2):
            if target[i] == pred[j] and target[i+1] == pred[j+1] and target[i+2] == pred[j+2]:
                counter += 1
                break
    prec = counter / (len(pred)-2)
    rec = counter / (len(target)-2)
    if prec == 0 and rec == 0:
        return prec, rec, 0
    f1 = 2 * prec *rec / (prec + rec)
    return prec, rec, f1

In [None]:
def evaluateRouge(model, loader, num):
    if num < 1 or num > 3:
        return 0, 0, 0
    counter = 0
    prec = 0
    rec = 0
    f1 = 0
    model.eval()
    with torch.no_grad():
        for src, trg in loader:
            batch_size = trg.shape[1]
            trg_len = trg.shape[0]

            x = []
            for i in range(batch_size):
                y = []
                for token in trg:
                    tok = token[i].item()
                    if tok != 0 and tok != 1 and tok != 2:
                        y.append(token[i].item())
                    if tok == EOF_token:
                        break
                x.append(y)

            a = [[dataset.label_tokens.index2word[idx] for idx in y] for y in x]
            
            
            outputs = model(src, trg, teacher_forcing_ratio=0)

            outputs_tokens = []
            for i in range(batch_size):
                output_token = []
                for output in outputs:
                    it = torch.argmax(output[i]).item()
                    if it != 0 and it != 1 and it != 2:
                        output_token.append(it)
                    if it == EOF_token:
                        break
                outputs_tokens .append(output_token)    
            
            b = [[dataset.label_tokens.index2word[idx] for idx in y] for y in outputs_tokens]

            temp_prec = 0
            temp_rec = 0
            temp_f1 = 0
            for i in range(batch_size):
                if num == 1:
                    x, y, z = calcRouge1(b[i], a[i])
                elif num == 2:
                    x, y, z = calcRouge2(b[i], a[i])
                elif num == 3:
                    x, y, z = calcRouge3(b[i], a[i])
                temp_prec += x
                temp_rec += y
                temp_f1 += z
            temp_prec /= batch_size
            temp_rec /= batch_size
            temp_f1 /= batch_size

            prec += temp_prec
            rec += temp_rec
            f1 += temp_f1
            counter += 1
    return prec/counter, rec/counter, f1/counter 

In [None]:
#torch.save(model.state_dict(), '/myDrive/b4_atten_model_94f.pt')
#torch.save(optimizer.state_dict(), '/myDrive/b4_atten_optim_94f.pt')

In [None]:
evaluateRouge(model, test_loader, 1)

(0.9769960930881985, 0.941008118637429, 0.9446649858147844)

In [None]:
evaluateRouge(model, test_loader, 2)

(0.9061002155796272, 0.8813883542020795, 0.8808833470631657)

In [None]:
evaluateRouge(model, test_loader, 3)

(0.839387001614943, 0.8245752174239017, 0.8225532416674057)

In [None]:
runTime(model, test_loader)

Average model time:  0.03120887279510498    Average system time:   0.03150530497233073


In [None]:
model.load_state_dict(torch.load('/myDrive/b4_atten_model_94f.pt', map_location=device))
optimizer.load_state_dict(torch.load('/myDrive/b4_atten_optim_94f.pt', map_location=device))

In [None]:
evaluateRandomly(model)

LABEL:  SOF 6 - 1 ihlal edildiğine EOF
PREDICTION:  PAD 6 - 1 ihlal edildiğine EOF

LABEL:  SOF 1 nolu protokolün 1 , 8 , 13 ihlal edildiğine EOF
PREDICTION:  PAD 8 ihlal edildiğine EOF

LABEL:  SOF 6 - 1 , 6 - 3 d ihlal edildiğine EOF
PREDICTION:  PAD 6 - 1 ihlal edildiğine EOF

LABEL:  SOF 1 nolu protokolün 1 ihlal edildiğine EOF
PREDICTION:  PAD 1 nolu protokolün 1 ihlal edildiğine EOF

LABEL:  SOF 6 - 1 ihlal edilmediğine EOF
PREDICTION:  PAD 6 - 1 ihlal edilmediğine EOF

LABEL:  SOF 10 ihlal edildiğine EOF
PREDICTION:  PAD 10 ihlal edildiğine EOF

LABEL:  SOF 6 - 1 ihlal edildiğine EOF
PREDICTION:  PAD 6 - 1 ihlal edildiğine EOF

LABEL:  SOF 10 , 6 - 1 ihlal edildiğine ; 14 ihlal edilmediğine EOF
PREDICTION:  PAD 10 , 6 - 1 ihlal edildiğine ; 14 ihlal edilmediğine EOF

LABEL:  SOF 6 - 1 , 1 nolu protokolün 1 ihlal edildiğine EOF
PREDICTION:  PAD 6 - 1 , 1 nolu protokolün 1 ihlal edildiğine EOF

LABEL:  SOF 6 - 1 ihlal edildiğine EOF
PREDICTION:  PAD 6 - 1 ihlal edildiğine EOF

LAB