# Evaluation

## Open dataset

In [1]:
from pathlib import Path
from constants import *

# Open source
with open(Path(DATASET_DIR, 'wmt14_en_test.src'), 'r') as f:
    source_dataset = []
    for sentence in f:
        source_dataset.append([int(x) for x in sentence.split(' ')[:-1]])

# Open target
with open(Path(DATASET_DIR, 'wmt14_fr_test.trg'), 'r') as f:
    target_dataset = []
    for sentence in f:
        target_dataset.append([int(x) for x in sentence.split(' ')[:-1]])


## Create the model

In [2]:
from transformers import GPT2Tokenizer

tokenizer = GPT2Tokenizer.from_pretrained('./models/tokenizer')

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from models import Transformer
import json

with open("config.json") as f:
    config = json.load(f)

embed_dim = config.get('EMBED_DIM', 256)
tokenizer = GPT2Tokenizer.from_pretrained('./models/tokenizer')
vocab_size = tokenizer.vocab_size
special_tokens = len(tokenizer.special_tokens_map)
num_encoder_layers = config.get('NUM_ENCODER_LAYERS', 2)
num_decoder_layers = config.get('NUM_DECODER_LAYERS', 2)
n_heads = config.get('NUM_ATTENTION_HEADS', 4)
ffn_hidden_dim = config.get('FFN_HIDDEN_DIM', 512)

# Load the model 
model = Transformer(
embed_dim=embed_dim,
src_vocab_size=vocab_size + special_tokens,
trg_vocab_size=vocab_size + special_tokens,
num_layers_enc=num_encoder_layers,
num_layers_dec=num_decoder_layers,
n_head=n_heads,
hidden_size=ffn_hidden_dim
)

## Make a prediction

In [4]:
import torch

In [5]:
source = [source_dataset[0]]
source

[[50257, 49738, 12754, 17330, 5013, 15903, 3827, 21555, 4265, 50258]]

In [6]:
tokenizer = GPT2Tokenizer.from_pretrained('./models/tokenizer')
pad_idx = tokenizer.pad_token_id

In [28]:
# Prediction 
source = torch.LongTensor(source)
max_seq_len = 512
source_mask = torch.ones(1, 1, source.size(1))
# Prediction 
prediction = model.generate_greedy(source, source_mask, max_seq_len, tokenizer.bos_token_id, tokenizer.eos_token_id)
txt_prediction = tokenizer.decode(prediction[0])
txt_prediction = [word for word in txt_prediction.split(" ") if (word != tokenizer.bos_token and word != tokenizer.eos_token)]
txt_prediction = " ".join(txt_prediction)

## Evaluation

In [1]:
# Open target
from pathlib import Path
from constants import *

with open(Path(DATASET_DIR, 'wmt14_fr_test.trg'), 'r') as f:
    target_dataset = []
    for sentence in f:
        target_dataset.append([int(x) for x in sentence.split(' ')[:-1]])

In [2]:
from transformers import GPT2Tokenizer

tokenizer = GPT2Tokenizer.from_pretrained('./models/tokenizer')

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
txt_targets = []
for sentence in target_dataset:
    decoded_sentence = tokenizer.decode(sentence)
    txt_target = [token for token in decoded_sentence.split(' ') if (token != tokenizer.bos_token and token != tokenizer.eos_token)]
    txt_targets.append(txt_target)


In [6]:
PREDICTIONS_DIR = "./predictions/"

with open(PREDICTIONS_DIR + "wmt14_en_fr_llm.txt", 'r') as f:
    txt_predictions = f.readlines()

In [12]:
from nltk.translate.bleu_score import sentence_bleu
import numpy as np

bleu_scores = []
for i in range(len(txt_predictions)):
    score = sentence_bleu([txt_targets[i]], txt_predictions[i].split(" ")[:-1], )
    bleu_scores.append(score)

print("BLEU score: ", np.mean(bleu_scores))

BLEU score:  1.0
