In [1]:
import os
import math
import string

import torch
import torch.nn as nn
import torch.optim as optim

from torchtext.legacy.data import Field, BucketIterator
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset

import nltk
import numpy as np
import pandas as pd
from tqdm import tqdm
import fasttext
import sklearn

import models
from trtokenizer.tr_tokenizer import SentenceTokenizer, WordTokenizer

import unicodedata
import re

In [2]:
DATAFRAME_PATH = './dataframes'

In [3]:
"""
news_df = pd.read_csv('dataframes/wmt-news.csv')
news_df = news_df.drop('partition', axis = 1)
news_df.en = news_df.en.apply(lambda x: str(x).replace('\n', ''))
news_df.tr = news_df.tr.apply(lambda x: str(x).replace('\n', ''))
news_df

train_val_df = news_df.sample(9000, random_state = 7)
test_df = news_df[~news_df.index.isin(train_val_df.index)]

train_df = train_val_df.sample(8500, random_state = 7)
valid_df = train_val_df[~train_val_df.index.isin(train_df.index)]

len(train_df), len(valid_df), len(test_df)

iwslt_df = pd.read_csv('dataframes/iwslt14.csv')

iwslt_train_val_df = iwslt_df.sample(2000, random_state = 7)
iwslt_test_df = iwslt_df[~iwslt_df.index.isin(iwslt_train_val_df.index)]

iwslt_train_df = iwslt_train_val_df.sample(1500, random_state = 7)
iwslt_valid_df = iwslt_train_val_df[~iwslt_train_val_df.index.isin(iwslt_train_df.index)]

len(iwslt_train_df), len(iwslt_valid_df), len(iwslt_test_df)

train_df['split'] = 'train'
valid_df['split'] = 'validation'
test_df['split']  = 'test'

iwslt_train_df['split'] = 'train'
iwslt_valid_df['split'] = 'validation'
iwslt_test_df['split'] = 'test'

df = pd.concat([train_df, valid_df, test_df, iwslt_train_df, iwslt_valid_df, iwslt_test_df])
df = df.reset_index(drop= True)
df.to_csv('dataframes/combined-iwslt-news.csv')
""";

In [4]:
"""
df = pd.read_csv(os.path.join('dataframes', 'wmt16.csv'))
df.split.unique()

# Turn a Unicode string to plain ASCII, thanks to
# https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

with open('en-fr-data/eng-fra.txt') as f:
    lines = f.read().strip().split('\n')
    lines = [line.split('\t') for line in lines]

train, test = sklearn.model_selection.train_test_split(lines, test_size = 0.1, train_size = 0.9)
train, valid = sklearn.model_selection.train_test_split(train, test_size = 0.1, train_size = 0.9)

en_samples = []
fr_samples = []

for sample in test:
    en, fr = sample

    en = normalizeString(en)
    fr = normalizeString(fr)

    en_samples.append(en)
    fr_samples.append(fr)

def create_df(en_samples, fr_samples, split:str):
    train_df = pd.DataFrame({'en': [], 'fr': [], 'split': []})

    train_df.en = en_samples
    train_df.fr = fr_samples
    train_df.split = split
    return train_df

train_df = create_df(en_samples, fr_samples, 'train')

valid_df = create_df(en_samples, fr_samples, 'validation')

test_df = create_df(en_samples, fr_samples, 'test')

pd.concat([train_df, valid_df, test_df]).reset_index(drop = True).to_csv('en_fr.csv')
""";

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
def save_checkpoint(model, optimizer, loss, epoch, path):
    checkpoint = {
        "epoch" : epoch,
        "loss" : loss,
        "model_state_dict" : model.state_dict(),
        "optimizer_state_dict" : optimizer.state_dict(),
    }

    torch.save(checkpoint, path)

def load_checkpoint(model, path):
    checkpoint = torch.load(path, map_location = device)

    model.load_state_dict(checkpoint["model_state_dict"])
    model = model.to(device)

    epoch = checkpoint["epoch"]
    loss = checkpoint["loss"]

    return epoch, loss

## Train Definitions

In [7]:
MAX_EPOCH  = 30
BATCH_SIZE = 12
INITIAL_LR = 0.001
MODEL_TYPE = 'recurrent'

CLIP = 5 # ??
TRAIN = True

## Data Definitions

In [8]:
DATAFRAME_PATH = './dataframes'

In [9]:
tr_word_tokenizer = WordTokenizer()

def en_tokenizer(text: str) -> list:
    return nltk.word_tokenize(text, language = 'english')

def tr_tokenizer(text: str) -> list:
    return tr_word_tokenizer.tokenize(text)

def fr_tokenizer(text: str) -> list:
    return nltk.word_tokenize(text, language = 'french')

In [10]:
df = pd.read_csv(os.path.join(DATAFRAME_PATH, 'combined-iwslt-news.csv'))

train_df = df[df.split == 'train']
valid_df = df[df.split == 'validation']

valid_df = valid_df.reset_index(drop = True)
train_df = train_df.reset_index(drop = True)

In [11]:
wmt_df = pd.read_csv(os.path.join(DATAFRAME_PATH, 'wmt16.csv'))
wmt_train = wmt_df[wmt_df.split == 'train']
wmt_train = wmt_train.sample(5000, random_state = 7)

In [12]:
#train_df = pd.concat([train_df, wmt_train]).reset_index(drop = True)

### Build Vocabulary

In [13]:
class NMTDataset(Dataset):
    def __init__(self, en_series, tr_series, en_vocab, tr_vocab):
        self.en_series = en_series
        self.tr_series = tr_series
        self.en_vocab = en_vocab
        self.tr_vocab = tr_vocab

        assert len(en_series) == len(tr_series)
        self.ds_len = len(self.en_series)
        self.data = []

        self.convert_text_to_tokens()

    def __len__(self):
        return self.ds_len

    def __getitem__(self, indx):
        sample = self.data[indx]
        return {'src': sample[0], 'trg': sample[1]}

    def convert_text_to_tokens(self):
        for i in range(len(self.en_series)):
            en_tensor = torch.tensor([self.en_vocab[token] for token in self.en_series[i]], dtype = torch.long)
            tr_tensor = torch.tensor([self.tr_vocab[token] for token in self.tr_series[i]], dtype = torch.long)
            self.data.append((en_tensor, tr_tensor))

In [14]:
#tr_fasttext = fasttext.load_model('cc.tr.300.bin')

In [15]:
def apply_preprocess(x, field):
    def apply_token_replacement(token):
        return token.replace('.', '').replace(',', '').replace('“', '')

    out = field.preprocess(str(x))
    return [str(token.translate(str.maketrans('', '', string.punctuation))) for token in out]
    #return [apply_token_replacement(token) for token in out]
    return out

In [16]:
def get_corpora_dataset(en_text: str, tr_text: str, en_vocab, tr_vocab):
    data = []

    for i in range(len(en_text)):
        en_tensor = torch.tensor([en_vocab[token] for token in en_text[i]], dtype = torch.long)
        tr_tensor = torch.tensor([tr_vocab[token] for token in tr_text[i]], dtype = torch.long)
        data.append((en_tensor, tr_tensor))

    return data

def generate_batch(data_batch):
    en_batch, tr_batch = [], []

    for (en_item, tr_item) in data_batch:
        en_batch.append(torch.cat([torch.tensor([SOS_IDX]), en_item, torch.tensor([EOS_IDX])], dim=0))
        tr_batch.append(torch.cat([torch.tensor([SOS_IDX]), tr_item, torch.tensor([EOS_IDX])], dim=0))

    en_batch = pad_sequence(en_batch, padding_value=PAD_IDX)
    tr_batch = pad_sequence(tr_batch, padding_value=PAD_IDX)

    return en_batch, tr_batch

In [17]:
def pad_batch(data_batch):
    en_batch, tr_batch = [], []

    for item in data_batch:
        en_item = item['src']
        tr_item = item['trg']

        en_batch.append(torch.cat([torch.tensor([SOS_IDX]), en_item, torch.tensor([EOS_IDX])], dim=0))
        tr_batch.append(torch.cat([torch.tensor([SOS_IDX]), tr_item, torch.tensor([EOS_IDX])], dim=0))

    en_batch = pad_sequence(en_batch, padding_value=PAD_IDX)
    tr_batch = pad_sequence(tr_batch, padding_value=PAD_IDX)

    return en_batch, tr_batch

In [18]:
en_field = Field(tokenize = en_tokenizer, init_token='<sos>', eos_token='<eos>')
tr_field  = Field(tokenize = tr_tokenizer, init_token='<sos>', eos_token='<eos>')

# get preprocessed train data
en_train_preprocessed_text = train_df['en'].apply(lambda x: apply_preprocess(x, en_field))
tr_train_preprocessed_text = train_df['tr'].apply(lambda x: apply_preprocess(x, tr_field))

# get preprocessed train data
en_valid_preprocessed_text = valid_df['en'].apply(lambda x: apply_preprocess(x, en_field))
tr_valid_preprocessed_text = valid_df['tr'].apply(lambda x: apply_preprocess(x, tr_field))

# build vocabulary for the languages
en_field.build_vocab(pd.concat([en_train_preprocessed_text, en_valid_preprocessed_text]), min_freq = 2)
tr_field.build_vocab(pd.concat([tr_train_preprocessed_text, tr_valid_preprocessed_text]), min_freq = 2)

en_vocab = en_field.vocab
tr_vocab = tr_field.vocab

# define tags
# only taking values from tr since they are equal in en_vocab as well
PAD_IDX = tr_vocab['<pad>']
SOS_IDX = tr_vocab['<sos>']
EOS_IDX = tr_vocab['<eos>']

In [19]:
if TRAIN:
    train_dataset = NMTDataset(en_train_preprocessed_text, tr_train_preprocessed_text, en_vocab, tr_vocab)
    valid_dataset = NMTDataset(en_valid_preprocessed_text, tr_valid_preprocessed_text, en_vocab, tr_vocab)

    train_loader = BucketIterator(train_dataset, batch_size = BATCH_SIZE, 
                                                        sort_key = (lambda x: (len(x['src']) + len(x['trg']))), 
                                                        repeat=True,  sort=False,  shuffle=True, 
                                                        sort_within_batch=True)

    valid_loader = BucketIterator(valid_dataset, batch_size = BATCH_SIZE, 
                                                        sort_key = (lambda x: (len(x['src']) + len(x['trg']))), 
                                                        repeat=True,  sort=False,  shuffle=False, 
                                                        sort_within_batch=True)

    """

    #train_dataset = get_corpora_dataset(en_train_preprocessed_text, tr_train_preprocessed_text, en_vocab, tr_vocab)
    #valid_dataset = get_corpora_dataset(en_valid_preprocessed_text, tr_valid_preprocessed_text, en_vocab, tr_vocab)

    #train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn = generate_batch)
    #valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn = generate_batch)

    """

In [20]:
len(tr_vocab.itos[4:]), len(en_vocab.itos[4:])

(9195, 7067)

In [21]:
' '.join(en_train_preprocessed_text[1])

'Rebull and her colleagues detailed their latest analysis of Pleiades spin rates in three new papers  soon to be published in the Astronomical Journal '

In [22]:
' '.join(tr_train_preprocessed_text[1])

'Rebull ve meslektaşları Ülker takım yıldızının dönüş hızları ile ilgili en yeni analizlerini kısa süre içinde Astronomi Dergisi nde yayınlanacak üç yeni makalede ayrıntılı olarak açıklamıştır '

## Model Definitions

In [23]:
def xavier_init_weights(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)
    if type(m) == nn.LSTM:
        for param in m._flat_weights_names:
            if "weight" in param:
                nn.init.xavier_uniform_(m._parameters[param])

In [24]:
INPUT_DIM = len(en_vocab)
OUTPUT_DIM = len(tr_vocab)

# embedding hyperparams
ENC_EMB_DIM = 512
DEC_EMB_DIM = 512

ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

# model hyperparams
if MODEL_TYPE == 'attention':
    ENC_HID_DIM = 512
    DEC_HID_DIM = 512

    attn = models.Attention(ENC_HID_DIM, DEC_HID_DIM)
    enc  = models.AttentionEncoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT)
    dec  = models.AttentionDecoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn)
    model = models.AttentionSeq2Seq(enc, dec, device).to(device)

elif MODEL_TYPE == 'recurrent':
    HID_DIM = 1024
    N_LAYERS = 2

    enc = models.Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
    dec = models.Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)
    model = models.Seq2Seq(enc, dec, device).to(device)

    model.apply(xavier_init_weights)

In [25]:
# loss function
TRG_PAD_IDX = tr_vocab.stoi[tr_field.pad_token]
loss_fn = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)

optimizer = optim.Adam(model.parameters(), lr=INITIAL_LR)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=2)  # goal: maximize Dice score

### Train & Eval Loop

In [26]:
# BLEU SCORE
from torchtext.data.metrics import bleu_score

def calculate_bleu(gt_trg, pred_trg):
    bleu_scores = 0
    batch_size = gt_trg.shape[1]
    for i in range(batch_size):
        gt_sentence   = gt_trg[:, i]
        pred_sentence = pred_trg[:, i, :]

        gt_sentence   = [tr_vocab.itos[token_id] for token_id in gt_sentence]
        # greedy decoding
        pred_sentence = [tr_vocab.itos[torch.argmax(output_distr)] for output_distr in pred_sentence]

        bleu_score = nltk.translate.bleu_score.sentence_bleu([gt_sentence], pred_sentence, weights = [0.5, 0.5])
        bleu_scores += bleu_score

    return bleu_scores / batch_size

In [27]:
if TRAIN:
    best_valid_loss = 999999
    plateau_counter = 0

    for epoch in range(MAX_EPOCH):
        train_loader.create_batches()
        valid_loader.create_batches()

        train_looper = tqdm(enumerate(train_loader.batches), total=len(train_loader), leave = False, position = 0)
        train_looper.set_description("Epoch [{:003}]".format(epoch + 1))

        epoch_train_loss = 0
        epoch_valid_loss = 0
        train_bleu_score = 0
        valid_bleu_score = 0

        #train
        model.train()
        for i, batch in train_looper:
            src, trg = pad_batch(batch)
            src = src.to(device)
            trg = trg.to(device)

            optimizer.zero_grad()

            output = model(src, trg)
            #trg = [trg len, batch size]
            #output = [trg len, batch size, output dim]

            bleu_score = calculate_bleu(trg, output)
            train_bleu_score += bleu_score

            output_dim = output.shape[-1]
            output = output[1:].view(-1, output_dim)
            trg = trg[1:].view(-1)

            #trg = [(trg len - 1) * batch size]
            #output = [(trg len - 1) * batch size, output dim]

            loss = loss_fn(output, trg)
            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), CLIP)

            optimizer.step()
            epoch_train_loss += loss.item()
            train_looper.set_postfix(loss=loss.detach().item(), bleu = bleu_score, ppl= math.exp(loss))

        #evaluate
        model.eval()
        with torch.no_grad():
            for _, (batch) in enumerate(valid_loader.batches):
                src, trg = pad_batch(batch)
                src = src.to(device)
                trg = trg.to(device)

                output = model(src, trg, 0) #turn off teacher forcing

                bleu_score = calculate_bleu(trg, output)
                valid_bleu_score += bleu_score
                
                output = output[1:].view(-1, output.shape[-1])
                trg = trg[1:].view(-1)

                loss = loss_fn(output, trg)
                epoch_valid_loss += loss.item()


        epoch_train_loss = epoch_train_loss / len(train_loader)
        epoch_valid_loss = epoch_valid_loss / len(valid_loader)
        train_bleu_score = train_bleu_score / len(train_loader)
        valid_bleu_score = valid_bleu_score / len(valid_loader)

        scheduler.step(epoch_valid_loss)

        print("Epoch: {}, TrainLoss: {:.2f}, ValidLoss : {:.2f}, TrainBleu: \
              {:2f}, ValidBleu: {:2f}, lr: {}".format(epoch + 1, epoch_train_loss, 
                                                      epoch_valid_loss, train_bleu_score, valid_bleu_score,
                                                      optimizer.param_groups[0]['lr']))    

        # checkpoint
        #if epoch_valid_loss < best_valid_loss:
        if True:
            plateau_counter = 0
            best_valid_loss = epoch_valid_loss
            checkpoint_path = 'model_checkpoints/' + str(MODEL_TYPE) + "_model.pkl"
            print("Saving {} at EPOCH: {}".format(checkpoint_path, epoch + 1))
            save_checkpoint(model, optimizer, epoch_valid_loss, (epoch + 1), checkpoint_path)
        else:
            plateau_counter += 1
            if plateau_counter > 5:
                print("Early stopping...")
                #break

The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
                                                                                                      

Epoch: 1, TrainLoss: 5.68, ValidLoss : 5.24, TrainBleu:               0.094153, ValidBleu: 0.156235, lr: 0.001
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 1


                                                                                                   

Epoch: 2, TrainLoss: 3.53, ValidLoss : 4.23, TrainBleu:               0.271857, ValidBleu: 0.281767, lr: 0.001
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 2


                                                                                                 

Epoch: 3, TrainLoss: 2.36, ValidLoss : 3.93, TrainBleu:               0.448526, ValidBleu: 0.327493, lr: 0.001
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 3


                                                                                                 

Epoch: 4, TrainLoss: 1.86, ValidLoss : 3.81, TrainBleu:               0.522694, ValidBleu: 0.359017, lr: 0.0001
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 4


                                                                                                  

Epoch: 5, TrainLoss: 1.51, ValidLoss : 3.74, TrainBleu:               0.583258, ValidBleu: 0.384174, lr: 0.0001
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 5


                                                                                                  

Epoch: 6, TrainLoss: 1.43, ValidLoss : 3.72, TrainBleu:               0.600685, ValidBleu: 0.387930, lr: 0.0001
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 6


                                                                                                  

Epoch: 7, TrainLoss: 1.36, ValidLoss : 3.71, TrainBleu:               0.610438, ValidBleu: 0.395118, lr: 1e-05
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 7


                                                                                                  

Epoch: 8, TrainLoss: 1.31, ValidLoss : 3.71, TrainBleu:               0.618038, ValidBleu: 0.396841, lr: 1e-05
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 8


                                                                                                  

Epoch: 9, TrainLoss: 1.29, ValidLoss : 3.71, TrainBleu:               0.618863, ValidBleu: 0.396929, lr: 1e-05
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 9


                                                                                                  

Epoch: 10, TrainLoss: 1.29, ValidLoss : 3.71, TrainBleu:               0.619489, ValidBleu: 0.396490, lr: 1.0000000000000002e-06
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 10


                                                                                                  

Epoch: 11, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.621212, ValidBleu: 0.396663, lr: 1.0000000000000002e-06
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 11


                                                                                                  

Epoch: 12, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.620130, ValidBleu: 0.396861, lr: 1.0000000000000002e-06
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 12


                                                                                                  

Epoch: 13, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.621564, ValidBleu: 0.397497, lr: 1.0000000000000002e-07
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 13


                                                                                                  

Epoch: 14, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.621800, ValidBleu: 0.397383, lr: 1.0000000000000002e-07
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 14


                                                                                                  

Epoch: 15, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.620399, ValidBleu: 0.397383, lr: 1.0000000000000002e-07
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 15


                                                                                                  

Epoch: 16, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.620988, ValidBleu: 0.397384, lr: 1.0000000000000004e-08
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 16


                                                                                                  

Epoch: 17, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.621196, ValidBleu: 0.397384, lr: 1.0000000000000004e-08
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 17


                                                                                                  

Epoch: 18, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.619934, ValidBleu: 0.397384, lr: 1.0000000000000004e-08
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 18


                                                                                                  

Epoch: 19, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.620584, ValidBleu: 0.397384, lr: 1.0000000000000004e-08
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 19


                                                                                                  

Epoch: 20, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.621233, ValidBleu: 0.397384, lr: 1.0000000000000004e-08
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 20


                                                                                                  

Epoch: 21, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.620080, ValidBleu: 0.397384, lr: 1.0000000000000004e-08
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 21


                                                                                                  

Epoch: 22, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.619876, ValidBleu: 0.397384, lr: 1.0000000000000004e-08
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 22


                                                                                                  

Epoch: 23, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.620879, ValidBleu: 0.397384, lr: 1.0000000000000004e-08
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 23


                                                                                                  

Epoch: 24, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.621777, ValidBleu: 0.397384, lr: 1.0000000000000004e-08
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 24


                                                                                                  

Epoch: 25, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.620689, ValidBleu: 0.397384, lr: 1.0000000000000004e-08
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 25


                                                                                                  

Epoch: 26, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.619976, ValidBleu: 0.397384, lr: 1.0000000000000004e-08
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 26


                                                                                                  

Epoch: 27, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.621437, ValidBleu: 0.397384, lr: 1.0000000000000004e-08
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 27


                                                                                                  

Epoch: 28, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.620296, ValidBleu: 0.397384, lr: 1.0000000000000004e-08
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 28


                                                                                                  

Epoch: 29, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.621645, ValidBleu: 0.397384, lr: 1.0000000000000004e-08
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 29


                                                                                                  

Epoch: 30, TrainLoss: 1.28, ValidLoss : 3.71, TrainBleu:               0.621256, ValidBleu: 0.397384, lr: 1.0000000000000004e-08
Saving model_checkpoints/recurrent_model.pkl at EPOCH: 30


## Evaluate

In [28]:
df = pd.read_csv(os.path.join(DATAFRAME_PATH, 'combined-iwslt-news.csv'))

test_df = df[df.split == 'test']
test_df = test_df.reset_index(drop = True)

In [29]:
en_test_preprocessed_text = test_df['en'].apply(lambda x: en_field.preprocess(x))
tr_test_preprocessed_text = test_df['tr'].apply(lambda x: tr_field.preprocess(x))

test_dataset = get_corpora_dataset(en_test_preprocessed_text, tr_test_preprocessed_text, en_vocab, tr_vocab)
test_loader = DataLoader(test_dataset, batch_size=12, shuffle=False, collate_fn=generate_batch)

In [30]:
load_checkpoint(model, 'model_checkpoints/' + str(MODEL_TYPE) + '_model.pkl')

(30, 3.710268461397694)

In [31]:
# BLEU SCORE
from torchtext.data.metrics import bleu_score

def get_bleu_batch(gt_trg, pred_trg):
    batch_size = gt_trg.shape[1]

    gt_sentences = []
    pred_sentences = []
    bleu_score_list = []

    for i in range(batch_size):
        gt_sentence   = gt_trg[:, i]
        pred_sentence = pred_trg[:, i, :]

        gt_sentence   = [tr_vocab.itos[token_id] for token_id in gt_sentence]
        # greedy decoding
        pred_sentence = [tr_vocab.itos[torch.argmax(output_distr)] for output_distr in pred_sentence]

        bleu_score = nltk.translate.bleu_score.sentence_bleu([gt_sentence], pred_sentence, weights = [0.5, 0.5])

        gt_sentences.append(gt_sentence)
        pred_sentences.append(pred_sentence)
        bleu_score_list.append(bleu_score)

    return gt_sentences, pred_sentences, bleu_score_list

In [32]:
test_loss = 0
#evaluate
model.eval()
with torch.no_grad():
    bleu_scores = []
    gt_sentences_list = []
    pred_sentences_list = []

    for _, (src, trg) in enumerate(test_loader):
        src = src.to(device)
        trg = trg.to(device)

        output = model(src, trg, 0) #turn off teacher forcing

        gt_sentences, pred_sentences, bleu_score_list = get_bleu_batch(trg, output)
        bleu_scores.extend(bleu_score_list)
        gt_sentences_list.extend(gt_sentences)
        pred_sentences_list.extend(pred_sentences)

        output = output[1:].view(-1, output.shape[-1])
        trg = trg[1:].view(-1)

        loss = loss_fn(output, trg)
        test_loss += loss.item()

In [33]:
indx = 5

In [34]:
' '.join(gt_sentences_list[indx])

'<sos> <unk> aylığı bağlama şartı olan 10 yıl <unk> 1800 günü olmayan hiç kimse emekli olamaz mı <unk> <eos> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad>'

In [35]:
' '.join(pred_sentences_list[indx])

'<unk> <unk>   <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk>   '

In [36]:
test_loss = test_loss / len(test_loader)

In [37]:
print('Test Loss: {:.3f}, Test PPL: {:7.3f}, Test Bleu: {:.3f}'.format(test_loss, math.exp(test_loss), np.mean(bleu_scores)))

Test Loss: 5.286, Test PPL: 197.480, Test Bleu: 0.121
