In [15]:
import numpy as np,torch

from translation_machine import dataset_mod,sentence_mod

whole_dataset = dataset_mod.DatasetFromTxt("../data/french_english_dataset/fra.txt")
idxs = np.load("../dataset_splitting/idx_overfitting.npy")


dataset_overfitted_on = dataset_mod.SentenceDataSet(torch.utils.data.Subset(whole_dataset,idxs),
                                                   sentence_mod.EnglishSentence,
                                                   sentence_mod.FrenchSentence)


In [16]:
from translation_machine import model_mod,model_bidirectionnal_mod


path = "../models/sequence_translator_extended_overfitted.pth"
back_up = torch.load(path)

model_inputs = back_up["model_inputs"]
sequence_translator = model_bidirectionnal_mod.SequenceTranslator(**model_inputs)


In [17]:
from pathlib import Path

path = "../models/sequence_translator_extended_overfitted.pth"
sequence_translator.load_state_dict(back_up["model_params"])
sequence_translator.eval()

SequenceTranslator(
  (src_embedding_layer): Embedding(14087, 128)
  (dst_embedding_layer): Embedding(22815, 128)
  (encoder): LSTM(128, 512, batch_first=True, bidirectional=True)
  (decoder): LSTM(128, 1024, batch_first=True)
  (linear_layer): Linear(in_features=1024, out_features=22815, bias=True)
)

In [278]:
from translation_machine import translator_mod

translator = translator_mod.Translator(sequence_translator)

In [279]:
english_sentence,french_sentence = dataset_overfitted_on[0]
english_sentence,french_sentence

(<translation_machine.sentence_mod._create_sentence_type.<locals>.Sentence at 0x7fd3dcf41b70>,
 <translation_machine.sentence_mod._create_sentence_type.<locals>.Sentence at 0x7fd3dcf417b0>)

In [280]:
french_sentence.as_words,english_sentence.as_words

(['Va', '!'], ['Go', '.'])

In [281]:
preds_and_targets = [(translator(english_sentence),french_sentence) for english_sentence,french_sentence in dataset_overfitted_on]
preds,targets = zip(*preds_and_targets)

In [420]:
from translation_machine.metrics import bleu_score_nltk_mod

ff = bleu_score_nltk_mod.BleuSoreNLTK()

In [421]:
a,b = [pred.as_words for pred  in preds] ,[target.as_words for target in targets]
ff.update(a,b)


In [422]:
ff.compute()

0.38150038058561964

In [423]:
targets_1 = [[el.as_words] for el in targets]
preds_1 = [el.as_words for el in preds]

corpus_bleu(targets_1,preds_1,smoothing_function=ff.smoothing_function,weights=[0.0,1.0,0.0])

0.6496296296296297

In [385]:
preds_1

[['Va', '!'],
 ['Salut', '.'],
 ['Salut', '.'],
 ['Courez', '!'],
 ['Courez', '!'],
 ['Qui', '?'],
 ['Ça', 'alors', '!'],
 ['Au', 'feu', '!'],
 ['À', 'l', "'", 'aide', '!'],
 ['Saute', '.']]

In [374]:
targets_1,preds_1

([[['Va', '!']]], [['Va', '!']])

In [379]:
from nltk.translate.bleu_score import sentence_bleu
reference = [['Va','!']]
candidate = ['Va','!']
score = sentence_bleu(reference, candidate)

In [380]:
score

1.491668146240062e-154

In [438]:
references_for_torch_metric,candidate_for_torch_metrics

([['the quick brown fox jumped over the lazy dog']],
 ['the quick brown fox jumped over the lazy'])

In [447]:

from ignite.metrics.nlp import Bleu
from nltk.translate.bleu_score import sentence_bleu
from torchmetrics.text.bleu import BLEUScore


references = [['the', 'quick', 'brown', 'fox', 'jumped', 'over', 'the', 'lazy', 'dog']]
candidate = ['the', 'quick', 'brown', 'fox', 'jumped', 'over', 'the', 'lazy' ]


#using nltk
score = sentence_bleu(references, candidate)
print(score)



#using torch_metrics

references_for_torch_metric = [[" ".join(el)  for el in references]]
candidate_for_torch_metrics = [" ".join(candidate)]
bleu = BLEUScore()
print(float(bleu(candidate_for_torch_metrics,references_for_torch_metric)))

#using ignite

references_for_ignite = [references]
candidate_for_ignite = [candidate]

bleu = Bleu()
bleu.reset()
bleu.update((candidate_for_ignite,references_for_ignite))
print(float(bleu.compute()))


# 0.7102992180127422
# 0.0
# 0.0

0.8824969025845955
0.8824968934059143
0.8824969025845955


In [476]:
references_for_ignite,candidate_for_ignite

([[['the', 'quick', 'brown', 'fox', 'jumped', 'over', 'the', 'lazy', 'dog']]],
 [['the', 'quick', 'brown', 'fox', 'jumped', 'over', 'the', 'lazy']])

In [448]:
candidate_for_ignite_as_int = [[EnglishSentence.vocab[el] for el in el1] for el1 in candidate_for_ignite]
references_for_ignite_as_int = [[[EnglishSentence.vocab[el] for el in el1] for el1 in el2] for el2 in references_for_ignite]


[['the', 'quick', 'brown', 'fox', 'jumped', 'over', 'the', 'lazy']]

In [474]:
bleu = Bleu()
bleu.reset()
bleu.update((candidate_for_ignite_as_int,references_for_ignite_as_int))
print(float(bleu.compute()))

0.8824969025845955
